Reintegrated 'aligned_stack' branch. General summary [interesting to Devs only, really]:

* EEcore recompiler aligns the stack on entry for all platforms.
 * IOP recompiler aligns stack for GCC/Mac by default (can be force-enabled for all platforms via compiler define)
 * Added setjmp/longjmp to the EEcore recompiler, used by GCC to exit the recompiler in efficient form (Win32 platforms use SEH).
 * aR3000a.S and aR5900.S removed and replaced with x86Emitter generated dispatchers.
 * All C functions called from recompiled code use __fastcall (simple, fast, retains stack alignment in neat fashion)


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2054 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-10-21 21:16:07 +00:00
commit e4330ee0ee
47 changed files with 1042 additions and 1107 deletions

View File

@ -66,8 +66,8 @@
<Add option="-fno-guess-branch-probability" />
<Add option="-fno-dse" />
<Add option="-fno-tree-dse" />
<Add option="-fno-strict-aliasing" />
<Add option="-pipe -msse -msse2" />
<Add option="-mpreferred-stack-boundary=2" />
<Add option="-m32" />
<Add directory="../../include/Utilities" />
<Add directory="../../include" />

View File

@ -67,8 +67,8 @@
<Add option="-fno-guess-branch-probability" />
<Add option="-fno-dse" />
<Add option="-fno-tree-dse" />
<Add option="-fno-strict-aliasing" />
<Add option="-pipe -msse -msse2" />
<Add option="-mpreferred-stack-boundary=2" />
<Add option="-m32" />
<Add directory="../../include/x86emitter" />
<Add directory="../../include" />

View File

@ -38,7 +38,7 @@
Console.Error( ex.what() ); \
}
#ifdef __GNUC__
#ifdef __GNUG__
# define DESTRUCTOR_CATCHALL __DESTRUCTOR_CATCHALL( __PRETTY_FUNCTION__ )
#else
# define DESTRUCTOR_CATCHALL __DESTRUCTOR_CATCHALL( __FUNCTION__ )
@ -88,7 +88,7 @@ namespace Exception
// Returns a message suitable for end-user display.
// This message is usually meant for display in a user popup or such.
virtual wxString FormatDisplayMessage() const { return m_message_user; }
virtual void Rethrow() const=0;
virtual BaseException* Clone() const=0;
@ -176,7 +176,7 @@ namespace Exception
// instead issues a "silent" cancelation that is handled by the app gracefully (generates
// log, and resumes messages queue processing).
//
// I chose to have this exception derive from RuntimeError, since if one is thrown from outside
// I chose to have this exception derive from RuntimeError, since if one is thrown from outside
// an App message loop we'll still want it to be handled in a reasonably graceful manner.
class CancelEvent : public virtual RuntimeError
{
@ -188,13 +188,13 @@ namespace Exception
m_message_diag = fromUTF8( logmsg );
// overridden message formatters only use the diagnostic version...
}
explicit CancelEvent( const wxString& logmsg=L"No reason given." )
{
m_message_diag = logmsg;
// overridden message formatters only use the diagnostic version...
}
virtual wxString FormatDisplayMessage() const;
virtual wxString FormatDiagnosticMessage() const;
};

View File

@ -1,6 +1,6 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
@ -12,7 +12,7 @@
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <semaphore.h>
@ -62,7 +62,7 @@ namespace Threading
// --------------------------------------------------------------------------------------
// The following set of documented functions have Linux/Win32 specific implementations,
// which are found in WinThreads.cpp and LnxThreads.cpp
// Returns the number of available logical CPUs (cores plus hyperthreaded cpus)
extern void CountLogicalCores( int LogicalCoresPerPhysicalCPU, int PhysicalCoresPerPhysicalCPU );
@ -146,6 +146,7 @@ namespace Threading
void WaitRaw();
bool WaitRaw( const wxTimeSpan& timeout );
void WaitNoCancel();
void WaitNoCancel( const wxTimeSpan& timeout );
int Count();
void Wait();
@ -161,7 +162,7 @@ namespace Threading
MutexLock();
virtual ~MutexLock() throw();
virtual bool IsRecursive() const { return false; }
void Recreate();
bool RecreateIfLocked();
void Detach();
@ -173,10 +174,10 @@ namespace Threading
void LockRaw();
bool LockRaw( const wxTimeSpan& timeout );
void Wait();
bool Wait( const wxTimeSpan& timeout );
protected:
// empty constructor used by MutexLockRecursive
MutexLock( bool ) {}
@ -217,7 +218,7 @@ namespace Threading
// --------------------------------------------------------------------------------------
// PersistentThread - Helper class for the basics of starting/managing persistent threads.
// --------------------------------------------------------------------------------------
// This class is meant to be a helper for the typical threading model of "start once and
// This class is meant to be a helper for the typical threading model of "start once and
// reuse many times." This class incorporates a lot of extra overhead in stopping and
// starting threads, but in turn provides most of the basic thread-safety and event-handling
// functionality needed for a threaded operation. In practice this model is usually an
@ -230,7 +231,7 @@ namespace Threading
// void OnStart();
// void ExecuteTaskInThread();
// void OnCleanupInThread();
//
//
// Use the public methods Start() and Cancel() to start and shutdown the thread, and use
// m_sem_event internally to post/receive events for the thread (make a public accessor for
// it in your derived class if your thread utilizes the post).
@ -247,14 +248,14 @@ namespace Threading
protected:
typedef int (*PlainJoeFP)();
wxString m_name; // diagnostic name for our thread.
pthread_t m_thread;
Semaphore m_sem_event; // general wait event that's needed by most threads.
MutexLock m_lock_InThread; // used for canceling and closing threads in a deadlock-safe manner
MutexLockRecursive m_lock_start; // used to lock the Start() code from starting simultaneous threads accidentally.
volatile long m_detached; // a boolean value which indicates if the m_thread handle is valid
volatile long m_running; // set true by Start(), and set false by Cancel(), Block(), etc.
@ -285,7 +286,7 @@ namespace Threading
// Start() once necessary locks have been obtained. Do not override Start() directly
// unless you're really sure that's what you need to do. ;)
virtual void OnStart();
virtual void OnStartInThread();
// This is called when the thread has been canceled or exits normally. The PersistentThread
@ -327,7 +328,7 @@ namespace Threading
static void* _internal_callback( void* func );
static void _pt_callback_cleanup( void* handle );
};
//////////////////////////////////////////////////////////////////////////////////////////
// ScopedLock: Helper class for using Mutexes.
// Using this class provides an exception-safe (and generally clean) method of locking
@ -370,7 +371,7 @@ namespace Threading
m_lock.Lock();
m_IsLocked = true;
}
bool IsLocked() const { return m_IsLocked; }
protected:
@ -380,9 +381,9 @@ namespace Threading
, m_IsLocked( isTryLock ? m_lock.TryLock() : false )
{
}
};
class ScopedTryLock : public ScopedLock
{
public:

View File

@ -1,6 +1,6 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
@ -18,6 +18,17 @@
// Implementations found here: CALL and JMP! (unconditional only)
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
#ifdef __GNUG__
// GCC has a bug that causes the templated function handler for Jmp/Call emitters to generate
// bad asm code. (error is something like "7#*_uber_379s_mangled_$&02_name is already defined!")
// Using GCC's always_inline attribute fixes it. This differs from __forceinline in that it
// inlines *even in debug builds* which is (usually) undesirable.
// ... except when it avoids compiler bugs.
# define __always_inline_tmpl_fail __attribute__((always_inline))
#else
# define __always_inline_tmpl_fail
#endif
// ------------------------------------------------------------------------
template< bool isJmp >
class xImpl_JmpCall
@ -30,11 +41,11 @@ public:
__forceinline void operator()( const xRegister16& absreg ) const { xOpWrite( 0x66, 0xff, isJmp ? 4 : 2, absreg ); }
__forceinline void operator()( const ModSibStrict<u16>& src ) const { xOpWrite( 0x66, 0xff, isJmp ? 4 : 2, src ); }
// Special form for calling functions. This form automatically resolves the
// correct displacement based on the size of the instruction being generated.
template< typename T >
__forceinline void operator()( T* func ) const
template< typename T > __forceinline __always_inline_tmpl_fail
void operator()( T* func ) const
{
if( isJmp )
xJccKnownTarget( Jcc_Unconditional, (void*)(uptr)func, false ); // double cast to/from (uptr) needed to appease GCC
@ -42,7 +53,7 @@ public:
{
// calls are relative to the instruction after this one, and length is
// always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic).
sptr dest = (sptr)func - ((sptr)xGetPtr() + 5);
xWrite8( 0xe8 );
xWrite32( dest );

View File

@ -1,6 +1,6 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
@ -188,6 +188,7 @@ namespace x86Emitter
// ----- Miscellaneous Instructions -----
// Various Instructions with no parameter and no special encoding logic.
extern void xLEAVE();
extern void xRET();
extern void xCBW();
extern void xCWD();
@ -244,7 +245,7 @@ namespace x86Emitter
// ------------------------------------------------------------------------
// Forward Jump Helpers (act as labels!)
#define DEFINE_FORWARD_JUMP( label, cond ) \
template< typename OperandType > \
class xForward##label : public xForwardJump<OperandType> \
@ -328,7 +329,7 @@ namespace x86Emitter
DEFINE_FORWARD_JUMP( JPE, Jcc_ParityEven );
DEFINE_FORWARD_JUMP( JPO, Jcc_ParityOdd );
typedef xForwardJPE<s8> xForwardJPE8;
typedef xForwardJPE<s32> xForwardJPE32;
typedef xForwardJPO<s8> xForwardJPO8;
@ -362,7 +363,7 @@ namespace x86Emitter
extern void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src );
extern void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from );
extern void xMOVSS( const xRegisterSSE& to, const xRegisterSSE& from );
extern void xMOVSS( const ModSibBase& to, const xRegisterSSE& from );
extern void xMOVSD( const xRegisterSSE& to, const xRegisterSSE& from );
@ -421,12 +422,12 @@ namespace x86Emitter
extern void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 );
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND;
extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR;
extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR;
extern const Internal::SimdImpl_AndNot xANDN;
extern const Internal::SimdImpl_COMI<true> xCOMI;
@ -434,7 +435,7 @@ namespace x86Emitter
extern const Internal::SimdImpl_rSqrt<0x53> xRCP;
extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT;
extern const Internal::SimdImpl_Sqrt<0x51> xSQRT;
extern const Internal::SimdImpl_MinMax<0x5f> xMAX;
extern const Internal::SimdImpl_MinMax<0x5d> xMIN;
extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF;
@ -442,7 +443,7 @@ namespace x86Emitter
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST;
extern const Internal::SimdImpl_Compare<SSE2_Equal> xCMPEQ;
extern const Internal::SimdImpl_Compare<SSE2_Less> xCMPLT;
extern const Internal::SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
@ -484,9 +485,9 @@ namespace x86Emitter
extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI;
extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI;
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL;
extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
@ -502,7 +503,7 @@ namespace x86Emitter
extern const Internal::SimdImpl_PUnpack xPUNPCK;
extern const Internal::SimdImpl_Unpack xUNPCK;
extern const Internal::SimdImpl_Pack xPACK;
extern const Internal::SimdImpl_PAbsolute xPABS;
extern const Internal::SimdImpl_PSign xPSIGN;
extern const Internal::SimdImpl_PInsert xPINSR;
@ -513,7 +514,7 @@ namespace x86Emitter
extern const Internal::SimdImpl_Blend xBLEND;
extern const Internal::SimdImpl_DotProduct xDP;
extern const Internal::SimdImpl_Round xROUND;
extern const Internal::SimdImpl_PMove<true> xPMOVSX;
extern const Internal::SimdImpl_PMove<false> xPMOVZX;

View File

@ -31,11 +31,6 @@ typedef int x86IntRegType;
#define EBP 5
#define ESP 4
#define X86ARG1 EAX
#define X86ARG2 ECX
#define X86ARG3 EDX
#define X86ARG4 EBX
#define MM0 0
#define MM1 1
#define MM2 2

View File

@ -163,6 +163,14 @@ void Threading::Semaphore::WaitNoCancel()
pthread_setcancelstate( oldstate, NULL );
}
void Threading::Semaphore::WaitNoCancel( const wxTimeSpan& timeout )
{
int oldstate;
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate );
WaitRaw( timeout );
pthread_setcancelstate( oldstate, NULL );
}
int Threading::Semaphore::Count()
{
int retval;

View File

@ -185,7 +185,7 @@ void Threading::PersistentThread::Cancel( bool isBlocking )
}
pthread_cancel( m_thread );
}
if( isBlocking )
@ -212,7 +212,9 @@ void Threading::PersistentThread::Block()
bool Threading::PersistentThread::IsSelf() const
{
return pthread_self() == m_thread;
// Detached threads may have their pthread handles recycled as newer threads, causing
// false IsSelf reports.
return !m_detached && (pthread_self() == m_thread);
}
bool Threading::PersistentThread::IsRunning() const
@ -387,10 +389,10 @@ void Threading::PersistentThread::OnStartInThread()
void Threading::PersistentThread::_internal_execute()
{
m_lock_InThread.Lock();
_DoSetThreadName( m_name );
OnStartInThread();
_DoSetThreadName( m_name );
_try_virtual_invoke( &PersistentThread::ExecuteTaskInThread );
}
@ -409,7 +411,7 @@ void Threading::PersistentThread::OnCleanupInThread()
// callback function
void* Threading::PersistentThread::_internal_callback( void* itsme )
{
jASSUME( itsme != NULL );
pxAssert( itsme != NULL );
PersistentThread& owner = *((PersistentThread*)itsme);
pthread_cleanup_push( _pt_callback_cleanup, itsme );

View File

@ -729,6 +729,7 @@ __forceinline void xPOPFD() { xWrite8( 0x9D ); }
//////////////////////////////////////////////////////////////////////////////////////////
//
__forceinline void xLEAVE() { xWrite8( 0xC9 ); }
__forceinline void xRET() { xWrite8( 0xC3 ); }
__forceinline void xCBW() { xWrite16( 0x9866 ); }
__forceinline void xCWD() { xWrite8( 0x98 ); }

View File

@ -38,7 +38,7 @@ __releaseinline void UpdateCP0Status() {
cpuTestHwInts();
}
void WriteCP0Status(u32 value) {
void __fastcall WriteCP0Status(u32 value) {
cpuRegs.CP0.n.Status.val = value;
UpdateCP0Status();
}
@ -221,7 +221,7 @@ __forceinline void COP0_UpdatePCCR()
//if( cpuRegs.CP0.n.Status.b.ERL || !cpuRegs.PERF.n.pccr.b.CTE ) return;
// TODO : Implement memory mode checks here (kernel/super/user)
// For now we just assume user mode.
// For now we just assume kernel mode.
if( cpuRegs.PERF.n.pccr.val & 0xf )
{

View File

@ -16,7 +16,7 @@
#ifndef __COP0_H__
#define __COP0_H__
extern void WriteCP0Status(u32 value);
extern void __fastcall WriteCP0Status(u32 value);
extern void UpdateCP0Status();
extern void WriteTLB(int i);
extern void UnmapTLB(int i);

View File

@ -1,6 +1,6 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.

View File

@ -1,6 +1,6 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
@ -45,7 +45,7 @@ SyncCounter hsyncCounter;
SyncCounter vsyncCounter;
u32 nextsCounter; // records the cpuRegs.cycle value of the last call to rcntUpdate()
s32 nextCounter; // delta from nextsCounter, in cycles, until the next rcntUpdate()
s32 nextCounter; // delta from nextsCounter, in cycles, until the next rcntUpdate()
void rcntReset(int index) {
counters[index].count = 0;
@ -64,7 +64,7 @@ static __forceinline void _rcntSet( int cntidx )
// Stopped or special hsync gate?
if (!counter.mode.IsCounting || (counter.mode.ClockSource == 0x3) ) return;
// check for special cases where the overflow or target has just passed
// (we probably missed it because we're doing/checking other things)
if( counter.count > 0x10000 || counter.count > counter.target )
@ -80,17 +80,17 @@ static __forceinline void _rcntSet( int cntidx )
c = ((0x10000 - counter.count) * counter.rate) - (cpuRegs.cycle - counter.sCycleT);
c += cpuRegs.cycle - nextsCounter; // adjust for time passed since last rcntUpdate();
if (c < nextCounter)
if (c < nextCounter)
{
nextCounter = c;
cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
}
// Ignore target diff if target is currently disabled.
// (the overflow is all we care about since it goes first, and then the
// (the overflow is all we care about since it goes first, and then the
// target will be turned on afterward, and handled in the next event test).
if( counter.target & EECNT_FUTURE_TARGET )
if( counter.target & EECNT_FUTURE_TARGET )
{
return;
}
@ -98,11 +98,11 @@ static __forceinline void _rcntSet( int cntidx )
{
c = ((counter.target - counter.count) * counter.rate) - (cpuRegs.cycle - counter.sCycleT);
c += cpuRegs.cycle - nextsCounter; // adjust for time passed since last rcntUpdate();
if (c < nextCounter)
if (c < nextCounter)
{
nextCounter = c;
cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
}
}
}
}
@ -137,7 +137,7 @@ void rcntInit() {
hsyncCounter.Mode = MODE_HRENDER;
hsyncCounter.sCycle = cpuRegs.cycle;
vsyncCounter.Mode = MODE_VRENDER;
vsyncCounter.Mode = MODE_VRENDER;
vsyncCounter.sCycle = cpuRegs.cycle;
UpdateVSyncRate();
@ -148,7 +148,7 @@ void rcntInit() {
// debug code, used for stats
int g_nhsyncCounter;
static uint iFrame = 0;
static uint iFrame = 0;
#ifndef _WIN32
#include <sys/time.h>
@ -190,11 +190,11 @@ static void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scans
// Important! The hRender/hBlank timers should be 50/50 for best results.
// In theory a 70%/30% ratio would be more correct but in practice it runs
// like crap and totally screws audio synchronization and other things.
u64 Scanline = Frame / scansPerFrame;
u64 hBlank = Scanline / 2;
u64 hRender = Scanline - hBlank;
info->Framerate = framesPerSecond;
info->Render = (u32)(Render/10000);
info->Blank = (u32)(Blank/10000);
@ -202,14 +202,14 @@ static void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scans
info->hRender = (u32)(hRender/10000);
info->hBlank = (u32)(hBlank/10000);
info->hScanlinesPerFrame = scansPerFrame;
// Apply rounding:
if( ( Render - info->Render ) >= 5000 ) info->Render++;
else if( ( Blank - info->Blank ) >= 5000 ) info->Blank++;
if( ( hRender - info->hRender ) >= 5000 ) info->hRender++;
else if( ( hBlank - info->hBlank ) >= 5000 ) info->hBlank++;
// Calculate accumulative hSync rounding error per half-frame:
{
u32 hSyncCycles = ((info->hRender + info->hBlank) * scansPerFrame) / 2;
@ -284,12 +284,12 @@ void frameLimitReset()
// Framelimiter - Measures the delta time between calls and stalls until a
// certain amount of time passes if such time hasn't passed yet.
// See the GS FrameSkip function for details on why this is here and not in the GS.
extern int limitOn;
extern int limitOn;
static __forceinline void frameLimit()
{
// 999 means the user would rather just have framelimiting turned off...
if( /*!EmuConfig.Video.EnableFrameLimiting*/ !limitOn || EmuConfig.Video.FpsLimit >= 999 ) return;
s64 sDeltaTime;
u64 uExpectedEnd;
u64 iEnd;
@ -302,7 +302,7 @@ static __forceinline void frameLimit()
// If the framerate drops too low, reset the expected value. This avoids
// excessive amounts of "fast forward" syndrome which would occur if we
// tried to catch up too much.
if( sDeltaTime > m_iTicks*8 )
{
m_iStart = iEnd - m_iTicks;
@ -344,9 +344,9 @@ static __forceinline void VSyncStart(u32 sCycle)
EECNT_LOG( "///////// EE COUNTER VSYNC START \\\\\\\\\\\\\\\\\\\\ (frame: %d)", iFrame );
vSyncDebugStuff( iFrame ); // EE Profiling and Debug code
if ((CSRw & 0x8))
if ((CSRw & 0x8))
{
if (!(GSIMR&0x800))
if (!(GSIMR&0x800))
{
gsIrq();
}
@ -393,7 +393,7 @@ static __forceinline void VSyncEnd(u32 sCycle)
frameLimit(); // limit FPS
// This doesn't seem to be needed here. Games only seem to break with regard to the
// vsyncstart irq.
// vsyncstart irq.
//cpuRegs.eCycle[30] = 2;
}
@ -411,16 +411,16 @@ __forceinline void rcntUpdate_hScanline()
if (hsyncCounter.Mode & MODE_HBLANK) { //HBLANK Start
rcntStartGate(false, hsyncCounter.sCycle);
psxCheckStartGate16(0);
// Setup the hRender's start and end cycle information:
hsyncCounter.sCycle += vSyncInfo.hBlank; // start (absolute cycle value)
hsyncCounter.CycleT = vSyncInfo.hRender; // endpoint (delta from start value)
hsyncCounter.Mode = MODE_HRENDER;
}
else { //HBLANK END / HRENDER Begin
if (CSRw & 0x4)
if (CSRw & 0x4)
{
if (!(GSIMR&0x400))
if (!(GSIMR&0x400))
{
gsIrq();
}
@ -440,6 +440,8 @@ __forceinline void rcntUpdate_hScanline()
}
}
bool CoreCancelDamnit = false;
__forceinline void rcntUpdate_vSync()
{
s32 diff = (cpuRegs.cycle - vsyncCounter.sCycle);
@ -448,17 +450,24 @@ __forceinline void rcntUpdate_vSync()
if (vsyncCounter.Mode == MODE_VSYNC)
{
eeRecIsReset = false;
#ifndef PCSX2_SEH
if( CoreCancelDamnit || SysCoreThread::Get().HasPendingStateChangeRequest() )
{
longjmp( SetJmp_StateCheck, 1 );
}
#else
mtgsThread.RethrowException();
SysCoreThread::Get().StateCheckInThread();
#endif
if( eeRecIsReset )
{
eeRecIsReset = false;
cpuSetBranch();
// Hack! GCC is unwilling to let us throw exceptions here.
// (Ones in Exception::*, anyways.) Work around it by skipping
// it.
#ifdef _MSC_VER
#ifndef PCSX2_SEH
longjmp( SetJmp_RecExecute, SetJmp_Dispatcher );
#else
throw Exception::ForceDispatcherReg();
#endif
}
@ -479,7 +488,7 @@ __forceinline void rcntUpdate_vSync()
// Accumulate hsync rounding errors:
hsyncCounter.sCycle += vSyncInfo.hSyncError;
if (CHECK_MICROVU0) vsyncVUrec(0);
if (CHECK_MICROVU1) vsyncVUrec(1);
@ -511,20 +520,20 @@ static __forceinline void _cpuTestTarget( int i )
counters[i].count -= counters[i].target; // Reset on target
else
counters[i].target |= EECNT_FUTURE_TARGET;
}
}
else counters[i].target |= EECNT_FUTURE_TARGET;
}
static __forceinline void _cpuTestOverflow( int i )
{
if (counters[i].count <= 0xffff) return;
if (counters[i].mode.OverflowInterrupt) {
EECNT_LOG("EE Counter[%d] OVERFLOW - mode=%x, count=%x", i, counters[i].mode, counters[i].count);
counters[i].mode.OverflowReached = 1;
hwIntcIrq(counters[i].interrupt);
}
// wrap counter back around zero, and enable the future target:
counters[i].count -= 0x10000;
counters[i].target &= 0xffff;
@ -539,14 +548,14 @@ __forceinline void rcntUpdate()
rcntUpdate_vSync();
// Update counters so that we can perform overflow and target tests.
for (int i=0; i<=3; i++)
{
{
// We want to count gated counters (except the hblank which exclude below, and are
// counted by the hblank timer instead)
//if ( gates & (1<<i) ) continue;
if (!counters[i].mode.IsCounting ) continue;
if(counters[i].mode.ClockSource != 0x3) // don't count hblank sources
@ -561,7 +570,7 @@ __forceinline void rcntUpdate()
// Check Counter Targets and Overflows:
_cpuTestTarget( i );
_cpuTestOverflow( i );
}
}
else counters[i].sCycleT = cpuRegs.cycle;
}
@ -619,20 +628,20 @@ __forceinline void rcntStartGate(bool isVblank, u32 sCycle)
switch (counters[i].mode.GateMode) {
case 0x0: //Count When Signal is low (off)
// Just set the start cycle (sCycleT) -- counting will be done as needed
// for events (overflows, targets, mode changes, and the gate off below)
counters[i].mode.IsCounting = 1;
counters[i].sCycleT = sCycle;
EECNT_LOG("EE Counter[%d] %s StartGate Type0, count = %x",
isVblank ? "vblank" : "hblank", i, counters[i].count );
break;
case 0x2: // reset and start counting on vsync end
// this is the vsync start so do nothing.
break;
case 0x1: //Reset and start counting on Vsync start
case 0x3: //Reset and start counting on Vsync start and end
counters[i].mode.IsCounting = 1;
@ -695,7 +704,7 @@ __forceinline void rcntEndGate(bool isVblank , u32 sCycle)
// rcntUpdate, since we're being called from there anyway.
}
__forceinline void rcntWmode(int index, u32 value)
__forceinline void rcntWmode(int index, u32 value)
{
if(counters[index].mode.IsCounting) {
if(counters[index].mode.ClockSource != 0x3) {
@ -714,7 +723,7 @@ __forceinline void rcntWmode(int index, u32 value)
// Clear OverflowReached and TargetReached flags (0xc00 mask), but *only* if they are set to 1 in the
// given value. (yes, the bits are cleared when written with '1's).
counters[index].modeval &= ~(value & 0xc00);
counters[index].modeval &= ~(value & 0xc00);
counters[index].modeval = (counters[index].modeval & 0xc00) | (value & 0x3ff);
EECNT_LOG("EE Counter[%d] writeMode = %x passed value=%x", index, counters[index].modeval, value );
@ -724,17 +733,17 @@ __forceinline void rcntWmode(int index, u32 value)
case 2: counters[index].rate = 512; break;
case 3: counters[index].rate = vSyncInfo.hBlank+vSyncInfo.hRender; break;
}
_rcntSetGate( index );
_rcntSet( index );
}
__forceinline void rcntWcount(int index, u32 value)
__forceinline void rcntWcount(int index, u32 value)
{
EECNT_LOG("EE Counter[%d] writeCount = %x, oldcount=%x, target=%x", index, value, counters[index].count, counters[index].target );
counters[index].count = value & 0xffff;
// reset the target, and make sure we don't get a premature target.
counters[index].target &= 0xffff;
if( counters[index].count > counters[index].target )
@ -749,7 +758,7 @@ __forceinline void rcntWcount(int index, u32 value)
counters[index].sCycleT = cpuRegs.cycle - change;
}
}
}
}
else counters[index].sCycleT = cpuRegs.cycle;
_rcntSet( index );
@ -795,9 +804,9 @@ __forceinline u32 rcntRcount(int index)
u32 ret;
// only count if the counter is turned on (0x80) and is not an hsync gate (!0x03)
if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3))
if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3))
ret = counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate);
else
else
ret = counters[index].count;
// Spams the Console.
@ -807,9 +816,9 @@ __forceinline u32 rcntRcount(int index)
__forceinline u32 rcntCycle(int index)
{
if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3))
if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3))
return counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate);
else
else
return counters[index].count;
}

View File

@ -102,6 +102,7 @@ protected:
int m_CopyCommandTally;
int m_CopyDataTally;
volatile bool m_RingBufferIsBusy;
volatile bool m_PluginOpened;
// Counts the number of vsync frames queued in the MTGS ringbuffer. This is used to
// throttle the number of frames allowed to be rendered ahead of time for games that
@ -122,8 +123,6 @@ public:
mtgsThreadObject();
virtual ~mtgsThreadObject() throw();
void OnStart();
// Waits for the GS to empty out the entire ring buffer contents.
// Used primarily for plugin startup/shutdown.
void WaitGS();
@ -145,11 +144,15 @@ public:
protected:
void OpenPlugin();
void ClosePlugin();
void OnStart();
void OnResumeReady();
void OnSuspendInThread();
void OnPauseInThread() {}
void OnResumeInThread( bool IsSuspended );
void OnResumeReady();
void OnCleanupInThread();
// Saves MMX/XMM REGS, posts an event to the mtgsThread flag and releases a timeslice.
// For use in surrounding loops that wait on the mtgs.

View File

@ -115,7 +115,7 @@ void psxMemShutdown()
psxMemRLUT = NULL;
}
u8 iopMemRead8(u32 mem)
u8 __fastcall iopMemRead8(u32 mem)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -159,7 +159,7 @@ u8 iopMemRead8(u32 mem)
}
}
u16 iopMemRead16(u32 mem)
u16 __fastcall iopMemRead16(u32 mem)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -225,7 +225,7 @@ u16 iopMemRead16(u32 mem)
}
}
u32 iopMemRead32(u32 mem)
u32 __fastcall iopMemRead32(u32 mem)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -294,7 +294,7 @@ u32 iopMemRead32(u32 mem)
}
}
void iopMemWrite8(u32 mem, u8 value)
void __fastcall iopMemWrite8(u32 mem, u8 value)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -356,7 +356,7 @@ void iopMemWrite8(u32 mem, u8 value)
}
}
void iopMemWrite16(u32 mem, u16 value)
void __fastcall iopMemWrite16(u32 mem, u16 value)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -440,7 +440,7 @@ void iopMemWrite16(u32 mem, u16 value)
}
}
void iopMemWrite32(u32 mem, u32 value)
void __fastcall iopMemWrite32(u32 mem, u32 value)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;

View File

@ -75,24 +75,24 @@ static __forceinline u8* iopPhysMem( u32 addr )
#define psxHu16(mem) (*(u16*)&psxH[(mem) & 0xffff])
#define psxHu32(mem) (*(u32*)&psxH[(mem) & 0xffff])
void psxMemAlloc();
void psxMemReset();
void psxMemShutdown();
extern void psxMemAlloc();
extern void psxMemReset();
extern void psxMemShutdown();
u8 iopMemRead8 (u32 mem);
u16 iopMemRead16(u32 mem);
u32 iopMemRead32(u32 mem);
void iopMemWrite8 (u32 mem, u8 value);
void iopMemWrite16(u32 mem, u16 value);
void iopMemWrite32(u32 mem, u32 value);
extern u8 __fastcall iopMemRead8 (u32 mem);
extern u16 __fastcall iopMemRead16(u32 mem);
extern u32 __fastcall iopMemRead32(u32 mem);
extern void __fastcall iopMemWrite8 (u32 mem, u8 value);
extern void __fastcall iopMemWrite16(u32 mem, u16 value);
extern void __fastcall iopMemWrite32(u32 mem, u32 value);
// x86reg and mmreg are always x86 regs
void psxRecMemRead8();
void psxRecMemRead16();
void psxRecMemRead32();
void psxRecMemWrite8();
void psxRecMemWrite16();
void psxRecMemWrite32();
extern void psxRecMemRead8();
extern void psxRecMemRead16();
extern void psxRecMemRead32();
extern void psxRecMemWrite8();
extern void psxRecMemWrite16();
extern void psxRecMemWrite32();
namespace IopMemory
{

View File

@ -1,6 +1,6 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
@ -12,7 +12,7 @@
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include <sys/mman.h>
@ -44,14 +44,15 @@ void SysPageFaultExceptionFilter( int signal, siginfo_t *info, void * )
// get bad virtual address
uptr offset = (u8*)info->si_addr - psM;
DevCon.Status( "Protected memory cleanup. Offset 0x%x", offset );
if (offset>=Ps2MemSize::Base)
{
// Bad mojo! Completely invalid address.
// Instigate a crash or abort emulation or something.
assert( false );
wxTrap();
if( !IsDebugBuild )
raise( SIGKILL );
}
DevCon.Status( "Protected memory cleanup. Offset 0x%x", offset );
mmap_ClearCpuBlock( offset & ~m_pagemask );
}

View File

@ -96,8 +96,8 @@
<Add option="-fno-guess-branch-probability" />
<Add option="-fno-dse" />
<Add option="-fno-tree-dse" />
<Add option="-fno-strict-aliasing" />
<Add option="-pipe -msse -msse2" />
<Add option="-mpreferred-stack-boundary=2" />
<Add option="-m32" />
<Add option="-DWX_PRECOMP" />
<Add directory="$(SvnRootDir)/common/include/" />
@ -387,7 +387,6 @@
<Unit filename="../vtlb.h" />
<Unit filename="../x86/BaseblockEx.cpp" />
<Unit filename="../x86/BaseblockEx.h" />
<Unit filename="../x86/aR3000A.S" />
<Unit filename="../x86/aVUzerorec.S" />
<Unit filename="../x86/aVif.S" />
<Unit filename="../x86/iCOP0.cpp" />
@ -420,7 +419,6 @@
<Option compiler="gcc" use="0" buildCommand="gcc $options -S $includes -c $file -o $object" />
</Unit>
<Unit filename="../x86/ir5900tables.cpp" />
<Unit filename="../x86/ix86-32/aR5900-32.S" />
<Unit filename="../x86/ix86-32/iCore-32.cpp" />
<Unit filename="../x86/ix86-32/iR5900-32.cpp" />
<Unit filename="../x86/ix86-32/iR5900Arit.cpp" />

View File

@ -79,7 +79,6 @@ struct MTGS_BufferedData
static __aligned(32) MTGS_BufferedData RingBuffer;
extern bool renderswitch;
static volatile bool gsIsOpened = false;
#ifdef RINGBUF_DEBUG_STACK
@ -98,6 +97,7 @@ mtgsThreadObject::mtgsThreadObject() :
, m_CopyCommandTally( 0 )
, m_CopyDataTally( 0 )
, m_RingBufferIsBusy( false )
, m_PluginOpened( false )
, m_QueuedFrames( 0 )
, m_packet_size( 0 )
, m_packet_ringpos( 0 )
@ -111,7 +111,7 @@ mtgsThreadObject::mtgsThreadObject() :
void mtgsThreadObject::OnStart()
{
gsIsOpened = false;
m_PluginOpened = false;
m_RingPos = 0;
m_WritePos = 0;
@ -187,14 +187,6 @@ struct PacketTagType
u32 data[3];
};
static void _clean_close_gs( void* obj )
{
if( !gsIsOpened ) return;
gsIsOpened = false;
if( g_plugins != NULL )
g_plugins->m_info[PluginId_GS].CommonBindings.Close();
}
static void dummyIrqCallback()
{
// dummy, because MTGS doesn't need this mess!
@ -203,7 +195,7 @@ static void dummyIrqCallback()
void mtgsThreadObject::OpenPlugin()
{
if( gsIsOpened ) return;
if( m_PluginOpened ) return;
memcpy_aligned( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) );
GSsetBaseMem( RingBuffer.Regs );
@ -225,7 +217,7 @@ void mtgsThreadObject::OpenPlugin()
throw Exception::PluginOpenError( PluginId_GS );
}
gsIsOpened = true;
m_PluginOpened = true;
m_sem_OpenDone.Post();
GSCSRr = 0x551B4000; // 0x55190000
@ -238,7 +230,6 @@ void mtgsThreadObject::ExecuteTaskInThread()
PacketTagType prevCmd;
#endif
pthread_cleanup_push( _clean_close_gs, this );
while( true )
{
m_sem_event.WaitRaw(); // ... because this does a cancel test itself..
@ -409,18 +400,34 @@ void mtgsThreadObject::ExecuteTaskInThread()
}
m_RingBufferIsBusy = false;
}
pthread_cleanup_pop( true );
}
void mtgsThreadObject::ClosePlugin()
{
if( !m_PluginOpened ) return;
m_PluginOpened = false;
if( g_plugins != NULL )
g_plugins->m_info[PluginId_GS].CommonBindings.Close();
}
void mtgsThreadObject::OnSuspendInThread()
{
_clean_close_gs( NULL );
ClosePlugin();
_parent::OnSuspendInThread();
}
void mtgsThreadObject::OnResumeInThread( bool isSuspended )
{
if( isSuspended )
OpenPlugin();
_parent::OnResumeInThread( isSuspended );
}
void mtgsThreadObject::OnCleanupInThread()
{
ClosePlugin();
_parent::OnCleanupInThread();
}
// Waits for the GS to empty out the entire ring buffer contents.
@ -783,7 +790,7 @@ void mtgsThreadObject::SendGameCRC( u32 crc )
void mtgsThreadObject::WaitForOpen()
{
if( gsIsOpened ) return;
if( m_PluginOpened ) return;
Resume();
// Two-phase timeout on MTGS opening, so that possible errors are handled
@ -798,11 +805,11 @@ void mtgsThreadObject::WaitForOpen()
if( !m_sem_OpenDone.Wait( wxTimeSpan(0, 0, 4, 0) ) )
{
RethrowException();
// Not opened yet, and no exceptions. Weird? You decide!
// TODO : implement a user confirmation to cancel the action and exit the
// emulator forcefully, or to continue waiting on the GS.
throw Exception::PluginOpenError( PluginId_GS, "The MTGS thread has become unresponsive while waiting for the GS plugin to open." );
}
}

View File

@ -26,11 +26,6 @@ extern "C"
void so_call(coroutine_t coro);
void so_resume(void);
void so_exit(void);
void recRecompile( u32 startpc );
// aR3000A.S
void iopRecRecompile(u32 startpc);
}
#ifdef __LINUX__
@ -40,18 +35,6 @@ extern "C"
// aVUzerorec.S
void* SuperVUGetProgram(u32 startpc, int vuindex);
void SuperVUCleanupProgram(u32 startpc, int vuindex);
void svudispfn();
// aR3000A.S
void iopJITCompile();
void iopJITCompileInBlock();
void iopDispatcherReg();
// aR5900-32.S
void JITCompile();
void JITCompileInBlock();
void DispatcherReg();
void DispatcherEvent();
}
#endif

View File

@ -70,7 +70,8 @@ void psxShutdown() {
//psxCpu->Shutdown();
}
void psxException(u32 code, u32 bd) {
void __fastcall psxException(u32 code, u32 bd)
{
// PSXCPU_LOG("psxException %x: %x, %x", code, psxHu32(0x1070), psxHu32(0x1074));
//Console.WriteLn("!! psxException %x: %x, %x", code, psxHu32(0x1070), psxHu32(0x1074));
// Set the Cause

View File

@ -194,11 +194,11 @@ extern R3000Acpu *psxCpu;
extern R3000Acpu psxInt;
extern R3000Acpu psxRec;
void psxReset();
void psxShutdown();
void psxException(u32 code, u32 step);
extern void psxReset();
extern void psxShutdown();
extern void __fastcall psxException(u32 code, u32 step);
extern void psxBranchTest();
void psxMemReset();
extern void psxMemReset();
// Subsets
extern void (*psxBSC[64])();

View File

@ -1,6 +1,6 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
@ -61,8 +61,9 @@ extern void SysClearExecutionCache(); // clears recompiled execution caches!
extern u8 *SysMmapEx(uptr base, u32 size, uptr bounds, const char *caller="Unnamed");
extern void vSyncDebugStuff( uint frame );
//////////////////////////////////////////////////////////////////////////////////////////
//
// --------------------------------------------------------------------------------------
// Memory Protection (Used by VTLB, Recompilers, and Texture caches)
// --------------------------------------------------------------------------------------
#ifdef __LINUX__
# include <signal.h>
@ -87,6 +88,33 @@ extern void vSyncDebugStuff( uint frame );
# error PCSX2 - Unsupported operating system platform.
#endif
// --------------------------------------------------------------------------------------
// PCSX2_SEH - Defines existence of "built in" Structed Exception Handling support.
// --------------------------------------------------------------------------------------
// This should be available on Windows, via Microsoft or Intel compilers (I'm pretty sure Intel
// supports native SEH model). GNUC in Windows, or any compiler in a non-windows platform, will
// need to use setjmp/longjmp instead to exit recompiled code.
//
#if defined(_WIN32) && !defined(__GNUC__)
# define PCSX2_SEH
#else
# include <setjmp.h>
// Platforms without SEH need to use SetJmp / LongJmp to deal with exiting the recompiled
// code execution pipelines in an efficient manner, since standard C++ exceptions cannot
// unwind across dynamically recompiled code.
enum
{
SetJmp_Dispatcher = 1,
SetJmp_Exit,
};
extern jmp_buf SetJmp_RecExecute;
extern jmp_buf SetJmp_StateCheck;
#endif
class pxMessageBoxEvent;
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -54,7 +54,7 @@ void SysThreadBase::Start()
RethrowException();
if( pxAssertDev( m_ExecMode == ExecMode_Closing, "Unexpected thread status during SysThread startup." ) )
{
throw Exception::ThreadCreationError(
throw Exception::ThreadCreationError(
wxsFormat( L"Timeout occurred while attempting to start the %s thread.", m_name.c_str() ),
wxEmptyString
);
@ -264,6 +264,9 @@ void SysThreadBase::OnCleanupInThread()
m_RunningLock.Unlock();
}
void SysThreadBase::OnSuspendInThread() {}
void SysThreadBase::OnResumeInThread( bool isSuspended ) {}
void SysThreadBase::StateCheckInThread( bool isCancelable )
{
switch( m_ExecMode )
@ -338,6 +341,13 @@ SysCoreThread::SysCoreThread() :
SysCoreThread::~SysCoreThread() throw()
{
SysCoreThread::Cancel();
}
extern bool CoreCancelDamnit;
void SysCoreThread::Cancel( bool isBlocking )
{
CoreCancelDamnit = true;
_parent::Cancel();
}
@ -345,6 +355,7 @@ void SysCoreThread::Start()
{
if( g_plugins == NULL ) return;
g_plugins->Init();
CoreCancelDamnit = false; // belongs in OnStart actually, but I'm tired :P
_parent::Start();
}

View File

@ -93,6 +93,12 @@ public:
return m_ExecMode > ExecMode_Closed;
}
bool HasPendingStateChangeRequest()
{
ExecutionMode mode = m_ExecMode;
return (mode == ExecMode_Closing) || (mode == ExecMode_Pausing);
}
bool IsClosed() const { return !IsOpen(); }
ExecutionMode GetExecutionMode() const { return m_ExecMode; }
@ -164,6 +170,7 @@ public:
virtual void ApplySettings( const Pcsx2Config& src );
virtual void OnResumeReady();
virtual void Reset();
virtual void Cancel( bool isBlocking=true );
bool HasValidState()
{

View File

@ -50,7 +50,7 @@ void VU0MI_XGKICK() {
void VU0MI_XTOP() {
}
void vu0ExecMicro(u32 addr) {
void __fastcall vu0ExecMicro(u32 addr) {
VUM_LOG("vu0ExecMicro %x", addr);
if(VU0.VI[REG_VPU_STAT].UL & 0x1) {

View File

@ -46,7 +46,7 @@ void vu1ResetRegs()
static int count;
void vu1ExecMicro(u32 addr)
void __fastcall vu1ExecMicro(u32 addr)
{
while(VU0.VI[REG_VPU_STAT].UL & 0x100)
{

View File

@ -119,14 +119,14 @@ extern void (*VU1regs_UPPER_FD_11_TABLE[32])(_VURegsNum *VUregsn);
// VU0
extern void vu0ResetRegs();
extern void vu0ExecMicro(u32 addr);
extern void __fastcall vu0ExecMicro(u32 addr);
extern void vu0Exec(VURegs* VU);
extern void vu0Finish();
extern void recResetVU0( void );
// VU1
extern void vu1ResetRegs();
extern void vu1ExecMicro(u32 addr);
extern void __fastcall vu1ExecMicro(u32 addr);
extern void vu1Exec(VURegs* VU);
void VU0_UPPER_FD_00();

View File

@ -74,6 +74,10 @@ static __threadlocal bool _reentrant_lock = false;
// via messages.
void Pcsx2App::OnAssertFailure( const wxChar *file, int line, const wxChar *func, const wxChar *cond, const wxChar *msg )
{
// Used to allow the user to suppress future assertions during this application's session.
static bool disableAsserts = false;
if( disableAsserts ) return;
if( _reentrant_lock )
{
// Re-entrant assertions are bad mojo -- trap immediately.
@ -82,9 +86,6 @@ void Pcsx2App::OnAssertFailure( const wxChar *file, int line, const wxChar *func
_reentrant_lock = true;
// Used to allow the user to suppress future assertions during this application's session.
static bool disableAsserts = false;
wxString dbgmsg;
dbgmsg.reserve( 2048 );

View File

@ -242,7 +242,7 @@ ConsoleLogFrame::ConsoleLogFrame( MainEmuFrame *parent, const wxString& title, A
{
m_TextCtrl.SetBackgroundColour( wxColor( 230, 235, 242 ) );
m_TextCtrl.SetDefaultStyle( m_ColorTable[DefaultConsoleColor] );
// create Log menu (contains most options)
wxMenuBar *pMenuBar = new wxMenuBar();
wxMenu& menuLog = *new wxMenu();
@ -312,9 +312,7 @@ int m_pendingFlushes = 0;
// and this one will magically follow suite. :)
void ConsoleLogFrame::Write( ConsoleColors color, const wxString& text )
{
//#ifdef PCSX2_SEH
pthread_testcancel();
//#endif
ScopedLock lock( m_QueueLock );
@ -324,7 +322,7 @@ void ConsoleLogFrame::Write( ConsoleColors color, const wxString& text )
}
if( (m_QueueColorSection.GetLength() == 0) || ((color != Color_Current) && (m_QueueColorSection.GetLast().color != color)) )
{
{
++m_CurQueuePos; // Don't overwrite the NULL;
m_QueueColorSection.Add( ColorSection(color, m_CurQueuePos) );
}
@ -333,10 +331,10 @@ void ConsoleLogFrame::Write( ConsoleColors color, const wxString& text )
m_QueueBuffer.MakeRoomFor( endpos + 1 ); // and the null!!
memcpy_fast( &m_QueueBuffer[m_CurQueuePos], text.c_str(), sizeof(wxChar) * text.Length() );
m_CurQueuePos = endpos;
// this NULL may be overwritten if the next message sent doesn't perform a color change.
m_QueueBuffer[m_CurQueuePos] = 0;
// Idle events don't always pass (wx blocks them when moving windows or using menus, for
// example). So let's hackfix it so that an alternate message is posted if the queue is
// "piling up."
@ -355,7 +353,7 @@ void ConsoleLogFrame::Write( ConsoleColors color, const wxString& text )
++m_WaitingThreadsForFlush;
lock.Unlock();
if( !m_sem_QueueFlushed.WaitRaw( wxTimeSpan( 0,0,0,500 ) ) )
if( !m_sem_QueueFlushed.Wait( wxTimeSpan( 0,0,0,500 ) ) )
{
// Necessary since the main thread could grab the lock and process before
// the above function actually returns (gotta love threading!)
@ -528,7 +526,7 @@ void ConsoleLogFrame::OnFlushEvent( wxCommandEvent& evt )
// the textctrl has focus or not. The wxWidgets AppendText() function uses EM_LINESCROLL
// instead, which tends to be much faster for high-volume logs, but also ends up refreshing
// the console in sloppy fashion for normal logging.
// (both are needed, the WM_VSCROLL makes the scrolling smooth, and the EM_LINESCROLL avoids
// weird errors when the buffer reaches "max" and starts clearing old history)

View File

@ -487,10 +487,6 @@
RelativePath="..\..\x86\aMicroVU.S"
>
</File>
<File
RelativePath="..\..\x86\aR3000A.S"
>
</File>
<File
RelativePath="..\..\x86\ix86-32\aR5900-32.S"
>
@ -896,14 +892,6 @@
RelativePath="..\..\x86\iCOP2.cpp"
>
</File>
<File
RelativePath="..\..\x86\iCore.cpp"
>
</File>
<File
RelativePath="..\..\x86\iCore.h"
>
</File>
<File
RelativePath="..\..\x86\iFPU.cpp"
>
@ -995,10 +983,6 @@
<Filter
Name="ix86-32"
>
<File
RelativePath="..\..\x86\ix86-32\iCore-32.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86-32\iR5900-32.cpp"
>
@ -1755,6 +1739,22 @@
>
</File>
</Filter>
<Filter
Name="iCore"
>
<File
RelativePath="..\..\x86\ix86-32\iCore-32.cpp"
>
</File>
<File
RelativePath="..\..\x86\iCore.cpp"
>
</File>
<File
RelativePath="..\..\x86\iCore.h"
>
</File>
</Filter>
</Filter>
<Filter
Name="Windows"

View File

@ -46,20 +46,33 @@ struct BASEBLOCKEX
class BaseBlocks
{
private:
protected:
typedef std::multimap<u32, uptr>::iterator linkiter_t;
// switch to a hash map later?
std::multimap<u32, uptr> links;
typedef std::multimap<u32, uptr>::iterator linkiter_t;
uptr recompiler;
std::vector<BASEBLOCKEX> blocks;
public:
BaseBlocks() :
recompiler( NULL )
, blocks(0)
{
blocks.reserve(0x4000);
}
BaseBlocks(uptr recompiler_) :
recompiler(recompiler_),
blocks(0)
{
blocks.reserve(0x4000);
}
void SetJITCompile( void (*recompiler_)() )
{
recompiler = (uptr)recompiler_;
}
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
int LastIndex (u32 startpc) const;

View File

@ -1,51 +0,0 @@
// iR3000a.c assembly routines
.intel_syntax noprefix
//////////////////////////////////////////////////////////////////////////
// Note that iR3000A.S and iR5900.S asm code is now identical. Only some
// function names and the following two defines should ever differ:
#define REGINFO psxRegs
#define RECLUT psxRecLUT
#define PCOFFSET 0x208 // this must always match what Pcsx2 displays at startup
//////////////////////////////////////////////////////////////////////////
// Preprocessor Mess!
.extern REGINFO
.extern RECLUT
.extern iopRecRecompile
//////////////////////////////////////////////////////////////////////////
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
.global iopJITCompile
iopJITCompile:
mov esi, dword ptr [REGINFO + PCOFFSET]
push esi
call iopRecRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [RECLUT+esi*4]
jmp dword ptr [ecx+ebx]
.global iopJITCompileInBlock
iopJITCompileInBlock:
jmp iopJITCompile
//////////////////////////////////////////////////////////////////////////
// called when jumping to variable pc address.
.globl iopDispatcherReg
iopDispatcherReg:
mov eax, dword ptr [REGINFO + PCOFFSET]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [RECLUT+eax*4]
jmp dword ptr [ecx+ebx]

View File

@ -21,7 +21,6 @@ SuperVUExecuteProgram:
add esp, 4
mov dword ptr [s_callstack], eax
call SuperVUGetProgram
mov s_vu1ebp, ebp
mov s_vu1esi, esi
mov s_vuedi, edi
mov s_vuebx, ebx
@ -38,7 +37,6 @@ SuperVUExecuteProgram:
SuperVUEndProgram:
// restore cpu state
ldmxcsr g_sseMXCSR
mov ebp, s_vu1ebp
mov esi, s_vu1esi
mov edi, s_vuedi
mov ebx, s_vuebx

View File

@ -28,6 +28,7 @@
#include "iCOP0.h"
namespace Interp = R5900::Interpreter::OpcodeImpl::COP0;
using namespace x86Emitter;
namespace R5900 {
namespace Dynarec {
@ -163,12 +164,14 @@ void recMFC0( void )
break;
case 1:
CALLFunc( (uptr)COP0_UpdatePCCR );
MOV32MtoR(EAX, (uptr)&cpuRegs.PERF.n.pcr0);
iFlushCall(FLUSH_NODESTROY);
xCALL( COP0_UpdatePCCR );
xMOV(eax, &cpuRegs.PERF.n.pcr0);
break;
case 3:
CALLFunc( (uptr)COP0_UpdatePCCR );
MOV32MtoR(EAX, (uptr)&cpuRegs.PERF.n.pcr1);
iFlushCall(FLUSH_NODESTROY);
xCALL( COP0_UpdatePCCR );
xMOV(eax, &cpuRegs.PERF.n.pcr1);
break;
}
_deleteEEreg(_Rt_, 0);
@ -240,8 +243,8 @@ void recMTC0()
{
case 12:
iFlushCall(FLUSH_NODESTROY);
//_flushCachedRegs(); //NOTE: necessary?
_callFunctionArg1((uptr)WriteCP0Status, MEM_CONSTTAG, g_cpuConstRegs[_Rt_].UL[0]);
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] );
xCALL( WriteCP0Status );
break;
case 9:
@ -254,9 +257,10 @@ void recMTC0()
switch(_Imm_ & 0x3F)
{
case 0:
CALLFunc( (uptr)COP0_UpdatePCCR );
MOV32ItoM((uptr)&cpuRegs.PERF.n.pccr, g_cpuConstRegs[_Rt_].UL[0]);
CALLFunc( (uptr)COP0_DiagnosticPCCR );
iFlushCall(FLUSH_NODESTROY);
xCALL( COP0_UpdatePCCR );
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
xCALL( COP0_DiagnosticPCCR );
break;
case 1:
@ -288,8 +292,8 @@ void recMTC0()
{
case 12:
iFlushCall(FLUSH_NODESTROY);
//_flushCachedRegs(); //NOTE: necessary?
_callFunctionArg1((uptr)WriteCP0Status, MEM_GPRTAG|_Rt_, 0);
_eeMoveGPRtoR(ECX, _Rt_);
xCALL( WriteCP0Status );
break;
case 9:
@ -302,9 +306,10 @@ void recMTC0()
switch(_Imm_ & 0x3F)
{
case 0:
CALLFunc( (uptr)COP0_UpdatePCCR );
iFlushCall(FLUSH_NODESTROY);
xCALL( COP0_UpdatePCCR );
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
CALLFunc( (uptr)COP0_DiagnosticPCCR );
xCALL( COP0_DiagnosticPCCR );
break;
case 1:

View File

@ -136,11 +136,13 @@ static void recCTC2(s32 info)
MOV16ItoM((uptr)&VU0.VI[REG_FBRST].UL,g_cpuConstRegs[_Rt_].UL[0]&0x0c0c);
break;
case REG_CMSAR1: // REG_CMSAR1
iFlushCall(FLUSH_NOCONST);// since CALLFunc
iFlushCall(FLUSH_NOCONST);
assert( _checkX86reg(X86TYPE_VI, REG_VPU_STAT, 0) < 0 &&
_checkX86reg(X86TYPE_VI, REG_TPC, 0) < 0 );
// Execute VU1 Micro SubRoutine
_callFunctionArg1((uptr)vu1ExecMicro, MEM_CONSTTAG, g_cpuConstRegs[_Rt_].UL[0]&0xffff);
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0]&0xffff );
xCALL( vu1ExecMicro );
break;
default:
{
@ -191,10 +193,10 @@ static void recCTC2(s32 info)
AND32ItoR(EAX,0x0C0C);
MOV16RtoM((uptr)&VU0.VI[REG_FBRST].UL,EAX);
break;
case REG_CMSAR1: // REG_CMSAR1
case REG_CMSAR1: // REG_CMSAR1 (Execute VU1micro Subroutine)
iFlushCall(FLUSH_NOCONST);
_eeMoveGPRtoR(EAX, _Rt_);
_callFunctionArg1((uptr)vu1ExecMicro, MEM_X86TAG|EAX, 0); // Execute VU1 Micro SubRoutine
_eeMoveGPRtoR(ECX, _Rt_);
xCALL( vu1ExecMicro );
break;
default:
_eeMoveGPRtoM((uptr)&VU0.VI[_Fs_].UL,_Rt_);

View File

@ -187,8 +187,6 @@ u8 _hasFreeXMMreg();
void _freeXMMregs();
int _getNumXMMwrite();
// uses MEM_MMXTAG/MEM_XMMTAG to differentiate between the regs
void _recPushReg(int mmreg);
void _signExtendSFtoM(u32 mem);
// returns new index of reg, lower 32 bits already in mmx
@ -196,41 +194,8 @@ void _signExtendSFtoM(u32 mem);
// a negative shift is for sign extension
int _signExtendXMMtoM(u32 to, x86SSERegType from, int candestroy); // returns true if reg destroyed
// Defines for passing register info
// only valid during writes. If write128, then upper 64bits are in an mmxreg
// (mmreg&0xf). Constant is used from gprreg ((mmreg>>16)&0x1f)
enum memtag
{
MEM_EECONSTTAG = 0x0100, // argument is a GPR and comes from g_cpuConstRegs
MEM_PSXCONSTTAG = 0x0200,
MEM_MEMORYTAG = 0x0400,
MEM_MMXTAG = 0x0800, // mmreg is mmxreg
MEM_XMMTAG = 0x8000, // mmreg is xmmreg
MEM_X86TAG = 0x4000, // ignored most of the time
MEM_GPRTAG = 0x2000, // argument is a GPR reg
MEM_CONSTTAG = 0x1000 // argument is a const
};
template<memtag tag> static __forceinline bool IS_REG(s32 reg)
{
return ((reg >= 0) && (reg & tag));
}
template<memtag tag> static __forceinline bool IS_REG(u32 reg)
{
return !!(reg & tag);
}
#define IS_EECONSTREG(reg) IS_REG<MEM_EECONSTTAG>(reg)
#define IS_PSXCONSTREG(reg) IS_REG<MEM_PSXCONSTTAG>(reg)
#define IS_MMXREG(reg) IS_REG<MEM_MMXTAG>(reg)
#define IS_XMMREG(reg) IS_REG<MEM_XMMTAG>(reg)
#define IS_X86REG(reg) IS_REG<MEM_X86TAG>(reg)
#define IS_GPRREG(reg) IS_REG<MEM_GPRTAG>(reg)
#define IS_CONSTREG(reg) IS_REG<MEM_CONSTTAG>(reg)
#define IS_MEMORYREG(reg) IS_REG<MEM_MEMORYTAG>(reg)
static const int MEM_MMXTAG = 0x0800; // mmreg is mmxreg
static const int MEM_XMMTAG = 0x8000; // mmreg is xmmreg
//////////////////////
// Instruction Info //
@ -425,12 +390,6 @@ extern u16 x86FpuState;
//////////////////////////////////////////////////////////////////////////
// Utility Functions -- that should probably be part of the Emitter.
// see MEM_X defines for argX format
extern void _callPushArg(u32 arg, uptr argmem); /// X86ARG is ignored for 32bit recs
extern void _callFunctionArg1(uptr fn, u32 arg1, uptr arg1mem);
extern void _callFunctionArg2(uptr fn, u32 arg1, u32 arg2, uptr arg1mem, uptr arg2mem);
extern void _callFunctionArg3(uptr fn, u32 arg1, u32 arg2, u32 arg3, uptr arg1mem, uptr arg2mem, uptr arg3mem);
// Moves 128 bits of data using EAX/EDX (used by iCOP2 only currently)
extern void _recMove128MtoM(u32 to, u32 from);

View File

@ -58,52 +58,11 @@ uptr psxhwLUT[0x10000];
// R3000A statics
int psxreclog = 0;
#ifdef _MSC_VER
static u32 g_temp;
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static __declspec(naked) void iopJITCompile()
{
__asm {
mov esi, dword ptr [psxRegs.pc]
push esi
call iopRecRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [psxRecLUT+esi*4]
jmp dword ptr [ecx+ebx]
}
}
static __declspec(naked) void iopJITCompileInBlock()
{
__asm {
jmp iopJITCompile
}
}
// called when jumping to variable psxpc address
static __declspec(naked) void iopDispatcherReg()
{
__asm {
mov eax, dword ptr [psxRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
jmp dword ptr [ecx+ebx]
}
}
#endif // _MSC_VER
static u8 *recMem = NULL; // the recompiled blocks will be here
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static BaseBlocks recBlocks((uptr)iopJITCompile);
static BaseBlocks recBlocks;
static u8 *recPtr = NULL;
u32 psxpc; // recompiler psxpc
int psxbranch; // set for branch
@ -140,6 +99,277 @@ static u32 psxdump = 0;
(((mem) < g_psxMaxRecMem && (psxRecLUT[(mem) >> 16] + (mem))) ? \
psxRecClearMem(mem) : 4)
// =====================================================================================================
// Dynamically Compiled Dispatchers - R3000A style
// =====================================================================================================
static void __fastcall iopRecRecompile( const u32 startpc );
static u32 s_store_ebp, s_store_esp;
// Recompiled code buffer for EE recompiler dispatchers!
static u8 __pagealigned iopRecDispatchers[0x1000];
typedef void DynGenFunc();
static DynGenFunc* iopDispatcherEvent = NULL;
static DynGenFunc* iopDispatcherReg = NULL;
static DynGenFunc* iopJITCompile = NULL;
static DynGenFunc* iopJITCompileInBlock = NULL;
static DynGenFunc* iopEnterRecompiledCode = NULL;
static DynGenFunc* iopExitRecompiledCode = NULL;
static void recEventTest()
{
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
_cpuBranchTest_Shared();
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
}
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( wxsFormat( L"(R3000A Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !IsDevBuild ) return;
// --------- EBP Here -----------
xCMP( ebp, &s_store_ebp );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, &s_store_ebp ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, &s_store_esp );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, &s_store_esp ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile()
{
pxAssertMsg( iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( ecx, &psxRegs.pc );
xCALL( iopRecRecompile );
xMOV( eax, &psxRegs.pc );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[psxRecLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
return (DynGenFunc*)retval;
}
static DynGenFunc* _DynGen_JITCompileInBlock()
{
u8* retval = xGetPtr();
xJMP( iopJITCompile );
return (DynGenFunc*)retval;
}
// called when jumping to variable pc address
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( eax, &psxRegs.pc );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[psxRecLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
return (DynGenFunc*)retval;
}
// --------------------------------------------------------------------------------------
// EnterRecompiledCode - dynamic compilation stub!
// --------------------------------------------------------------------------------------
// In Release Builds this literally generates the following code:
// push edi
// push esi
// push ebx
// jmp DispatcherReg
// pop ebx
// pop esi
// pop edi
//
// See notes on why this works in both GCC (aligned stack!) and other compilers (not-so-
// aligned stack!). In debug/dev builds the code gen is more complicated, as it constructs
// ebp stackframe mess, which allows for a complete backtrace from debug breakpoints (yay).
//
// Also, if you set PCSX2_IOP_FORCED_ALIGN_STACK to 1, the codegen for MSVC becomes slightly
// more complicated since it has to perform a full stack alignment on entry.
//
#if defined(__GNUG__) || defined(__DARWIN__)
# define PCSX2_ASSUME_ALIGNED_STACK 1
#else
# define PCSX2_ASSUME_ALIGNED_STACK 0
#endif
// Set to 0 for a speedup in release builds.
// [doesn't apply to GCC/Mac, which must always align]
#define PCSX2_IOP_FORCED_ALIGN_STACK 0 //1
// For overriding stackframe generation options in Debug builds (possibly useful for troubleshooting)
// Typically this value should be the same as IsDevBuild.
static const bool GenerateStackFrame = IsDevBuild;
static DynGenFunc* _DynGen_EnterRecompiledCode()
{
u8* retval = xGetPtr();
bool allocatedStack = GenerateStackFrame || PCSX2_IOP_FORCED_ALIGN_STACK;
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
// allocating any room on the stack for it (which is important since the IOP's entry
// code gets invoked quite a lot).
if( allocatedStack )
{
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
xSUB( esp, 0x20 );
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
}
else
{
// GCC Compiler:
// The frame pointer coming in from the EE's event test can be safely assumed to be
// aligned, since GCC always aligns stackframes. While handy in x86-64, where CALL + PUSH EBP
// results in a neatly realigned stack on entry to every function, unfortunately in x86-32
// this is usually worthless because CALL+PUSH leaves us 8 byte aligned instead (fail). So
// we have to do the usual set of stackframe alignments and simulated callstack mess
// *regardless*.
// MSVC/Intel compilers:
// The PCSX2_IOP_FORCED_ALIGN_STACK setting is 0, so we don't care. Just push regs like
// the good old days! (stack alignment will be indeterminate)
xPUSH( edi );
xPUSH( esi );
xPUSH( ebx );
allocatedStack = false;
}
uptr* imm = NULL;
if( allocatedStack )
{
if( GenerateStackFrame )
{
// Simulate a CALL function by pushing the call address and EBP onto the stack.
// This retains proper stacktrace and stack unwinding (handy in devbuilds!)
xMOV( ptr32[esp+0x0c], 0xffeeff );
imm = (uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
xMOV( ptr32[esp+0x08], ebp );
xLEA( ebp, ptr32[esp+0x08] );
}
}
if( IsDevBuild )
{
xMOV( &s_store_esp, esp );
xMOV( &s_store_ebp, ebp );
}
xJMP( iopDispatcherReg );
if( imm != NULL )
*imm = (uptr)xGetPtr();
// ----------------------
// ----> Cleanup! ---->
iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
if( allocatedStack )
{
// pop the nested "simulated call" stackframe, if needed:
if( GenerateStackFrame ) xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
}
else
{
xPOP( ebx );
xPOP( esi );
xPOP( edi );
}
xRET();
return (DynGenFunc*)retval;
}
static void _DynGen_Dispatchers()
{
// In case init gets called multiple times:
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( iopRecDispatchers );
xSetPtr( iopRecDispatchers );
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
// most and stand to benefit from strong alignment and direct referencing.
iopDispatcherEvent = (DynGenFunc*)xGetPtr();
xCALL( recEventTest );
iopDispatcherReg = _DynGen_DispatcherReg();
iopJITCompile = _DynGen_JITCompile();
iopJITCompileInBlock = _DynGen_JITCompileInBlock();
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadOnly, true );
recBlocks.SetJITCompile( iopJITCompile );
}
////////////////////////////////////////////////////
using namespace R3000A;
#include "Utilities/AsciiFile.h"
@ -350,7 +580,10 @@ void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr)
void _psxFlushCall(int flushtype)
{
_freeX86regs();
// x86-32 ABI : These registers are not preserved across calls:
_freeX86reg( EAX );
_freeX86reg( ECX );
_freeX86reg( EDX );
if( flushtype & FLUSH_CACHED_REGS )
_psxFlushConstRegs();
@ -436,11 +669,6 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
_psxFlushCall(FLUSH_NODESTROY);
CALLFunc((uptr)zeroEx);
}
// Bios Call: Force the IOP to do a Branch Test ASAP.
// Important! This helps prevent game freeze-ups during boot-up and stage loads.
// Note: Fixes to cdvd have removed the need for this code.
//MOV32MtoR( EAX, (uptr)&psxRegs.cycle );
//MOV32RtoM( (uptr)&g_psxNextBranchCycle, EAX );
}
return;
}
@ -551,13 +779,14 @@ static void recAlloc()
throw Exception::OutOfMemory( "R3000a Init > Failed to allocate memory for pInstCache." );
ProfilerRegisterSource( "IOPRec", recMem, RECMEM_SIZE );
_DynGen_Dispatchers();
}
void recResetIOP()
{
// calling recResetIOP without first calling recInit is bad mojo.
jASSUME( recMem != NULL );
jASSUME( m_recBlockAlloc != NULL );
pxAssert( recMem != NULL );
pxAssert( m_recBlockAlloc != NULL );
DevCon.Status( "iR3000A Resetting recompiler memory and structures" );
@ -630,7 +859,7 @@ static void recExecute()
//for (;;) R3000AExecute();
}
static __forceinline s32 recExecuteBlock( s32 eeCycles )
static __noinline s32 recExecuteBlock( s32 eeCycles )
{
psxBreak = 0;
psxCycleEE = eeCycles;
@ -639,38 +868,23 @@ static __forceinline s32 recExecuteBlock( s32 eeCycles )
// The IOP does not use mmx/xmm registers, so we don't modify the status
// of the g_EEFreezeRegs here.
#ifdef _MSC_VER
__asm
{
push ebx
push esi
push edi
// [TODO] recExecuteBlock could be replaced by a direct call to the iopEnterRecompiledCode()
// (by assigning its address to the psxRec structure). But for that to happen, we need
// to move psxBreak/psxCycleEE update code to emitted assembly code. >_< --air
call iopDispatcherReg
// Likely Disasm, as borrowed from MSVC:
pop edi
pop esi
pop ebx
}
#else
__asm__ __volatile__
(
// We should be able to rely on GAS syntax (the register clobber list) as a
// replacement for manual push/pop of unpreserved registers.
// Entry:
// mov eax,dword ptr [esp+4]
// mov dword ptr [psxBreak (0E88DCCh)],0
// mov dword ptr [psxCycleEE (832A84h)],eax
".intel_syntax noprefix\n"
//"push ebx\n"
//"push esi\n"
//"push edi\n"
// Exit:
// mov ecx,dword ptr [psxBreak (0E88DCCh)]
// mov edx,dword ptr [psxCycleEE (832A84h)]
// lea eax,[edx+ecx]
"call iopDispatcherReg\n"
//"pop edi\n"
//"pop esi\n"
//"pop ebx\n"
".att_syntax\n"
: : : "eax", "ebx", "ecx", "edx", "esi", "edi", "memory" );
#endif
iopEnterRecompiledCode();
return psxBreak + psxCycleEE;
}
@ -690,7 +904,7 @@ static __forceinline u32 psxRecClearMem(u32 pc)
u32 lowerextent = pc, upperextent = pc + 4;
int blockidx = recBlocks.Index(pc);
jASSUME(blockidx != -1);
pxAssert(blockidx != -1);
while (BASEBLOCKEX* pexblock = recBlocks[blockidx - 1]) {
if (pexblock->startpc + pexblock->size * 4 <= lowerextent)
@ -709,14 +923,14 @@ static __forceinline u32 psxRecClearMem(u32 pc)
recBlocks.Remove(blockidx);
}
#ifdef PCSX2_DEVBUILD
blockidx=0;
while(BASEBLOCKEX* pexblock = recBlocks[blockidx++])
{
if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4) {
Console.Error("Impossible block clearing failure");
jASSUME(0);
DevCon.Error("Impossible block clearing failure");
pxFailDev( "Impossible block clearing failure" );
}
#endif
}
iopClearRecLUT(PSX_GETBLOCK(lowerextent), (upperextent - lowerextent) / 4);
@ -799,12 +1013,8 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
MOV32RtoM((uptr)&psxRegs.cycle, ECX); // update cycles
MOV32RtoM((uptr)&psxCycleEE, EAX);
j8Ptr[2] = JG8( 0 ); // jump if psxCycleEE > 0
RET(); // returns control to the EE
// Continue onward with branching here:
x86SetJ8( j8Ptr[2] );
// jump if psxCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE)
xJLE( iopExitRecompiledCode );
// check if an event is pending
SUB32MtoR(ECX, (uptr)&g_psxNextBranchCycle);
@ -846,7 +1056,9 @@ void rpsxSYSCALL()
MOV32ItoM((uptr)&psxRegs.pc, psxpc - 4);
_psxFlushCall(FLUSH_NODESTROY);
_callFunctionArg2((uptr)psxException, MEM_CONSTTAG, MEM_CONSTTAG, 0x20, psxbranch==1);
xMOV( ecx, 0x20 ); // exception code
xMOV( edx, psxbranch==1 ); // branch delay slot?
xCALL( psxException );
CMP32ItoM((uptr)&psxRegs.pc, psxpc-4);
j8Ptr[0] = JE8(0);
@ -867,7 +1079,9 @@ void rpsxBREAK()
MOV32ItoM((uptr)&psxRegs.pc, psxpc - 4);
_psxFlushCall(FLUSH_NODESTROY);
_callFunctionArg2((uptr)psxBREAK, MEM_CONSTTAG, MEM_CONSTTAG, 0x24, psxbranch==1);
xMOV( ecx, 0x24 ); // exception code
xMOV( edx, psxbranch==1 ); // branch delay slot?
xCALL( psxException );
CMP32ItoM((uptr)&psxRegs.pc, psxpc-4);
j8Ptr[0] = JE8(0);
@ -935,7 +1149,7 @@ static void printfn()
#endif
}
void iopRecRecompile(u32 startpc)
static void __fastcall iopRecRecompile( const u32 startpc )
{
u32 i;
u32 branchTo;

View File

@ -594,321 +594,23 @@ void rpsxDIVU_(int info) { rpsxDIVsuper(info, 0); }
PSXRECOMPILE_CONSTCODE3_PENALTY(DIVU, 1, psxInstCycles_Div);
//// LoadStores
#ifdef PCSX2_VIRTUAL_MEM
// VM load store functions (fastest)
//#define REC_SLOWREAD
//#define REC_SLOWWRITE
int _psxPrepareReg(int gprreg)
{
return 0;
}
static u32 s_nAddMemOffset = 0;
static __forceinline void SET_HWLOC_R3000A() {
x86SetJ8(j8Ptr[0]);
SHR32ItoR(ECX, 3);
if( s_nAddMemOffset ) ADD32ItoR(ECX, s_nAddMemOffset);
}
int rpsxSetMemLocation(int regs, int mmreg)
{
s_nAddMemOffset = 0;
MOV32MtoR( ECX, (int)&psxRegs.GPR.r[ regs ] );
if ( _Imm_ != 0 ) ADD32ItoR( ECX, _Imm_ );
SHL32ItoR(ECX, 3);
j8Ptr[0] = JS8(0);
SHR32ItoR(ECX, 3);
AND32ItoR(ECX, 0x1fffff); // 2Mb
return 1;
}
void recLoad32(u32 bit, u32 sign)
{
int mmreg = -1;
#ifdef REC_SLOWREAD
_psxFlushConstReg(_Rs_);
#else
if( PSX_IS_CONST1( _Rs_ ) ) {
// do const processing
int ineax = 0;
_psxOnWriteReg(_Rt_);
mmreg = EAX;
switch(bit) {
case 8: ineax = psxRecMemConstRead8(mmreg, g_psxConstRegs[_Rs_]+_Imm_, sign); break;
case 16:
assert( (g_psxConstRegs[_Rs_]+_Imm_) % 2 == 0 );
ineax = psxRecMemConstRead16(mmreg, g_psxConstRegs[_Rs_]+_Imm_, sign);
break;
case 32:
assert( (g_psxConstRegs[_Rs_]+_Imm_) % 4 == 0 );
ineax = psxRecMemConstRead32(mmreg, g_psxConstRegs[_Rs_]+_Imm_);
break;
}
if( _Rt_ ) MOV32RtoM( (int)&psxRegs.GPR.r[ _Rt_ ], EAX );
}
else
#endif
{
int dohw;
int mmregs = _psxPrepareReg(_Rs_);
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
dohw = rpsxSetMemLocation(_Rs_, mmregs);
switch(bit) {
case 8:
if( sign ) MOVSX32Rm8toROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
else MOVZX32Rm8toROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
break;
case 16:
if( sign ) MOVSX32Rm16toROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
else MOVZX32Rm16toROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
break;
case 32:
MOV32RmtoROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
break;
}
if( dohw ) {
j8Ptr[1] = JMP8(0);
SET_HWLOC_R3000A();
switch(bit) {
case 8:
CALLFunc( (int)psxRecMemRead8 );
if( sign ) MOVSX32R8toR(EAX, EAX);
else MOVZX32R8toR(EAX, EAX);
break;
case 16:
CALLFunc( (int)psxRecMemRead16 );
if( sign ) MOVSX32R16toR(EAX, EAX);
else MOVZX32R16toR(EAX, EAX);
break;
case 32:
CALLFunc( (int)psxRecMemRead32 );
break;
}
x86SetJ8(j8Ptr[1]);
}
if( _Rt_ )
MOV32RtoM( (int)&psxRegs.GPR.r[ _Rt_ ], EAX );
}
}
void rpsxLB() { recLoad32(8, 1); }
void rpsxLBU() { recLoad32(8, 0); }
void rpsxLH() { recLoad32(16, 1); }
void rpsxLHU() { recLoad32(16, 0); }
void rpsxLW() { recLoad32(32, 0); }
extern void rpsxMemConstClear(u32 mem);
// check if mem is executable, and clear it
__declspec(naked) void rpsxWriteMemClear()
{
_asm {
mov edx, ecx
shr edx, 14
and dl, 0xfc
add edx, psxRecLUT
test dword ptr [edx], 0xffffffff
jnz Clear32
ret
Clear32:
// recLUT[mem>>16] + (mem&0xfffc)
mov edx, dword ptr [edx]
mov eax, ecx
and eax, 0xfffc
// edx += 2*eax
shl eax, 1
add edx, eax
cmp dword ptr [edx], 0
je ClearRet
sub esp, 4
mov dword ptr [esp], edx
call psxRecClearMem
add esp, 4
ClearRet:
ret
}
}
extern u32 s_psxBlockCycles;
void recStore(int bit)
{
#ifdef REC_SLOWWRITE
_psxFlushConstReg(_Rs_);
#else
if( PSX_IS_CONST1( _Rs_ ) ) {
u8* pjmpok;
u32 addr = g_psxConstRegs[_Rs_]+_Imm_;
int doclear = 0;
if( !(addr & 0x10000000) ) {
// check g_psxWriteOk
CMP32ItoM((uptr)&g_psxWriteOk, 0);
pjmpok = JE8(0);
}
switch(bit) {
case 8:
if( PSX_IS_CONST1(_Rt_) ) doclear = psxRecMemConstWrite8(addr, MEM_PSXCONSTTAG|(_Rt_<<16));
else {
_psxMoveGPRtoR(EAX, _Rt_);
doclear = psxRecMemConstWrite8(addr, EAX);
}
break;
case 16:
assert( (addr)%2 == 0 );
if( PSX_IS_CONST1(_Rt_) ) doclear = psxRecMemConstWrite16(addr, MEM_PSXCONSTTAG|(_Rt_<<16));
else {
_psxMoveGPRtoR(EAX, _Rt_);
doclear = psxRecMemConstWrite16(addr, EAX);
}
break;
case 32:
assert( (addr)%4 == 0 );
if( PSX_IS_CONST1(_Rt_) ) doclear = psxRecMemConstWrite32(addr, MEM_PSXCONSTTAG|(_Rt_<<16));
else {
_psxMoveGPRtoR(EAX, _Rt_);
doclear = psxRecMemConstWrite32(addr, EAX);
}
break;
}
if( !(addr & 0x10000000) ) {
if( doclear ) rpsxMemConstClear((addr)&~3);
x86SetJ8(pjmpok);
}
}
else
#endif
{
int dohw;
int mmregs = _psxPrepareReg(_Rs_);
dohw = rpsxSetMemLocation(_Rs_, mmregs);
CMP32ItoM((uptr)&g_psxWriteOk, 0);
u8* pjmpok = JE8(0);
if( PSX_IS_CONST1( _Rt_ ) ) {
switch(bit) {
case 8: MOV8ItoRmOffset(ECX, g_psxConstRegs[_Rt_], PS2MEM_PSX_+s_nAddMemOffset); break;
case 16: MOV16ItoRmOffset(ECX, g_psxConstRegs[_Rt_], PS2MEM_PSX_+s_nAddMemOffset); break;
case 32: MOV32ItoRmOffset(ECX, g_psxConstRegs[_Rt_], PS2MEM_PSX_+s_nAddMemOffset); break;
}
}
else {
switch(bit) {
case 8:
MOV8MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]);
MOV8RtoRmOffset(ECX, EAX, PS2MEM_PSX_+s_nAddMemOffset);
break;
case 16:
MOV16MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]);
MOV16RtoRmOffset(ECX, EAX, PS2MEM_PSX_+s_nAddMemOffset);
break;
case 32:
MOV32MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]);
MOV32RtoRmOffset(ECX, EAX, PS2MEM_PSX_+s_nAddMemOffset);
break;
}
}
if( s_nAddMemOffset ) ADD32ItoR(ECX, s_nAddMemOffset);
CMP32MtoR(ECX, (uptr)&g_psxMaxRecMem);
j8Ptr[1] = JAE8(0);
if( bit < 32 ) AND8ItoR(ECX, 0xfc);
CALLFunc((u32)rpsxWriteMemClear);
if( dohw ) {
j8Ptr[2] = JMP8(0);
SET_HWLOC_R3000A();
if( PSX_IS_CONST1(_Rt_) ) {
switch(bit) {
case 8: MOV8ItoR(EAX, g_psxConstRegs[_Rt_]); break;
case 16: MOV16ItoR(EAX, g_psxConstRegs[_Rt_]); break;
case 32: MOV32ItoR(EAX, g_psxConstRegs[_Rt_]); break;
}
}
else {
switch(bit) {
case 8: MOV8MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]); break;
case 16: MOV16MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]); break;
case 32: MOV32MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]); break;
}
}
if( s_nAddMemOffset != 0 ) ADD32ItoR(ECX, s_nAddMemOffset);
// some type of hardware write
switch(bit) {
case 8: CALLFunc( (int)psxRecMemWrite8 ); break;
case 16: CALLFunc( (int)psxRecMemWrite16 ); break;
case 32: CALLFunc( (int)psxRecMemWrite32 ); break;
}
x86SetJ8(j8Ptr[2]);
}
x86SetJ8(j8Ptr[1]);
x86SetJ8(pjmpok);
}
}
void rpsxSB() { recStore(8); }
void rpsxSH() { recStore(16); }
void rpsxSW() { recStore(32); }
REC_FUNC(LWL);
REC_FUNC(LWR);
REC_FUNC(SWL);
REC_FUNC(SWR);
#else
// TLB loadstore functions
REC_FUNC(LWL);
REC_FUNC(LWR);
REC_FUNC(SWL);
REC_FUNC(SWR);
using namespace x86Emitter;
static void rpsxLB()
{
_psxDeleteReg(_Rs_, 1);
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg1((uptr)iopMemRead8, X86ARG1|MEM_X86TAG, 0);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xCALL( iopMemRead8 ); // returns value in EAX
if (_Rt_) {
MOVSX32R8toR(EAX, EAX);
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
@ -922,9 +624,9 @@ static void rpsxLBU()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg1((uptr)iopMemRead8, X86ARG1|MEM_X86TAG, 0);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xCALL( iopMemRead8 ); // returns value in EAX
if (_Rt_) {
MOVZX32R8toR(EAX, EAX);
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
@ -938,9 +640,9 @@ static void rpsxLH()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg1((uptr)iopMemRead16, X86ARG1|MEM_X86TAG, 0);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xCALL( iopMemRead16 ); // returns value in EAX
if (_Rt_) {
MOVSX32R16toR(EAX, EAX);
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
@ -954,9 +656,9 @@ static void rpsxLHU()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg1((uptr)iopMemRead16, X86ARG1|MEM_X86TAG, 0);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xCALL( iopMemRead16 ); // returns value in EAX
if (_Rt_) {
MOVZX32R16toR(EAX, EAX);
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
@ -971,13 +673,13 @@ static void rpsxLW()
_psxDeleteReg(_Rt_, 0);
_psxFlushCall(FLUSH_EVERYTHING);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
TEST32ItoR(X86ARG1, 0x10000000);
TEST32ItoR(ECX, 0x10000000);
j8Ptr[0] = JZ8(0);
_callFunctionArg1((uptr)iopMemRead32, X86ARG1|MEM_X86TAG, 0);
xCALL( iopMemRead32 ); // returns value in EAX
if (_Rt_) {
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
}
@ -985,11 +687,11 @@ static void rpsxLW()
x86SetJ8(j8Ptr[0]);
// read from psM directly
AND32ItoR(X86ARG1, 0x1fffff);
ADD32ItoR(X86ARG1, (uptr)psxM);
AND32ItoR(ECX, 0x1fffff);
ADD32ItoR(ECX, (uptr)psxM);
MOV32RmtoR( X86ARG1, X86ARG1 );
MOV32RtoM( (uptr)&psxRegs.GPR.r[_Rt_], X86ARG1);
MOV32RmtoR( ECX, ECX );
MOV32RtoM( (uptr)&psxRegs.GPR.r[_Rt_], ECX);
x86SetJ8(j8Ptr[1]);
PSX_DEL_CONST(_Rt_);
@ -1000,9 +702,10 @@ static void rpsxSB()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg2((uptr)iopMemWrite8, X86ARG1|MEM_X86TAG, MEM_MEMORYTAG, 0, (uptr)&psxRegs.GPR.r[_Rt_]);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xMOV( edx, &psxRegs.GPR.r[_Rt_] );
xCALL( iopMemWrite8 );
}
static void rpsxSH()
@ -1010,9 +713,10 @@ static void rpsxSH()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg2((uptr)iopMemWrite16, X86ARG1|MEM_X86TAG, MEM_MEMORYTAG, 0, (uptr)&psxRegs.GPR.r[_Rt_]);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xMOV( edx, &psxRegs.GPR.r[_Rt_] );
xCALL( iopMemWrite16 );
}
static void rpsxSW()
@ -1020,13 +724,12 @@ static void rpsxSW()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg2((uptr)iopMemWrite32, X86ARG1|MEM_X86TAG, MEM_MEMORYTAG, 0, (uptr)&psxRegs.GPR.r[_Rt_]);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xMOV( edx, &psxRegs.GPR.r[_Rt_] );
xCALL( iopMemWrite32 );
}
#endif // end load store
//// SLL
void rpsxSLL_const()
{

View File

@ -1,57 +0,0 @@
// iR5900.c assembly routines
.intel_syntax noprefix
//////////////////////////////////////////////////////////////////////////
// Note that iR3000A.S and iR5900.S asm code is now identical. Only some
// function names and the following two defines should ever differ:
#define REGINFO cpuRegs
#define RECLUT recLUT
#define PCOFFSET 0x2a8 // this must always match what Pcsx2 displays at startup
//////////////////////////////////////////////////////////////////////////
// Preprocessor Mess!
.extern REGINFO
.extern RECLUT
.extern recRecompile
.extern recEventTest
//////////////////////////////////////////////////////////////////////////
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
.global JITCompile
JITCompile:
mov esi, dword ptr [REGINFO + PCOFFSET]
push esi
call recRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [RECLUT+esi*4]
jmp dword ptr [ecx+ebx]
.global JITCompileInBlock
JITCompileInBlock:
jmp JITCompile
//////////////////////////////////////////////////////////////////////////
// called when jumping to variable pc address.
.globl DispatcherReg
DispatcherReg:
mov eax, dword ptr [REGINFO + PCOFFSET]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [RECLUT+eax*4]
jmp dword ptr [ecx+ebx]
.globl DispatcherEvent
DispatcherEvent:
call recEventTest
jmp DispatcherReg

View File

@ -236,7 +236,8 @@ void _flushConstRegs()
int _allocX86reg(int x86reg, int type, int reg, int mode)
{
int i;
assert( reg >= 0 && reg < 32 );
pxAssertDev( reg >= 0 && reg < 32, "Register index out of bounds." );
pxAssertDev( x86reg != ESP && x86reg != EBP, "Allocation of ESP/EBP is not allowed!" );
// don't alloc EAX and ESP,EBP if MODE_NOFRAME
int oldmode = mode;
@ -448,14 +449,10 @@ void _freeX86reg(int x86reg)
x86regs[x86reg].inuse = 0;
}
void _freeX86regs() {
int i;
for (i=0; i<iREGCNT_GPR; i++) {
if (!x86regs[i].inuse) continue;
void _freeX86regs()
{
for (int i=0; i<iREGCNT_GPR; i++)
_freeX86reg(i);
}
}
// MMX Caching
@ -863,88 +860,6 @@ void SetFPUstate() {
}
}
__forceinline void _callPushArg(u32 arg, uptr argmem)
{
if( IS_X86REG(arg) ) {
PUSH32R(arg&0xff);
}
else if( IS_CONSTREG(arg) ) {
PUSH32I(argmem);
}
else if( IS_GPRREG(arg) ) {
SUB32ItoR(ESP, 4);
_eeMoveGPRtoRm(ESP, arg&0xff);
}
else if( IS_XMMREG(arg) ) {
SUB32ItoR(ESP, 4);
SSEX_MOVD_XMM_to_Rm(ESP, arg&0xf);
}
else if( IS_MMXREG(arg) ) {
SUB32ItoR(ESP, 4);
MOVD32MMXtoRm(ESP, arg&0xf);
}
else if( IS_EECONSTREG(arg) ) {
PUSH32I(g_cpuConstRegs[(arg>>16)&0x1f].UL[0]);
}
else if( IS_PSXCONSTREG(arg) ) {
PUSH32I(g_psxConstRegs[(arg>>16)&0x1f]);
}
else if( IS_MEMORYREG(arg) ) {
PUSH32M(argmem);
}
else {
assert( (arg&0xfff0) == 0 );
// assume it is a GPR reg
PUSH32R(arg&0xf);
}
}
__forceinline void _callFunctionArg1(uptr fn, u32 arg1, uptr arg1mem)
{
_callPushArg(arg1, arg1mem);
CALLFunc((uptr)fn);
ADD32ItoR(ESP, 4);
}
__forceinline void _callFunctionArg2(uptr fn, u32 arg1, u32 arg2, uptr arg1mem, uptr arg2mem)
{
_callPushArg(arg2, arg2mem);
_callPushArg(arg1, arg1mem);
CALLFunc((uptr)fn);
ADD32ItoR(ESP, 8);
}
__forceinline void _callFunctionArg3(uptr fn, u32 arg1, u32 arg2, u32 arg3, uptr arg1mem, uptr arg2mem, uptr arg3mem)
{
_callPushArg(arg3, arg3mem);
_callPushArg(arg2, arg2mem);
_callPushArg(arg1, arg1mem);
CALLFunc((uptr)fn);
ADD32ItoR(ESP, 12);
}
void _recPushReg(int mmreg)
{
if( IS_XMMREG(mmreg) ) {
SUB32ItoR(ESP, 4);
SSEX_MOVD_XMM_to_Rm(ESP, mmreg&0xf);
}
else if( IS_MMXREG(mmreg) ) {
SUB32ItoR(ESP, 4);
MOVD32MMXtoRm(ESP, mmreg&0xf);
}
else if( IS_EECONSTREG(mmreg) ) {
PUSH32I(g_cpuConstRegs[(mmreg>>16)&0x1f].UL[0]);
}
else if( IS_PSXCONSTREG(mmreg) ) {
PUSH32I(g_psxConstRegs[(mmreg>>16)&0x1f]);
}
else {
assert( (mmreg&0xfff0) == 0 );
PUSH32R(mmreg);
}
}
void _signExtendSFtoM(u32 mem)
{
LAHF();

View File

@ -79,8 +79,7 @@ static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static u32 *recRAMCopy = NULL;
void JITCompile();
static BaseBlocks recBlocks((uptr)JITCompile);
static BaseBlocks recBlocks;
static u8* recPtr = NULL;
static u32 *recConstBufPtr = NULL;
EEINST* s_pInstCache = NULL;
@ -310,6 +309,200 @@ u32* recGetImm64(u32 hi, u32 lo)
return imm64;
}
// =====================================================================================================
// R5900 Dispatchers
// =====================================================================================================
static void __fastcall recRecompile( const u32 startpc );
static u32 g_lastpc = 0;
static u32 s_store_ebp, s_store_esp;
// Recompiled code buffer for EE recompiler dispatchers!
static u8 __pagealigned eeRecDispatchers[0x1000];
typedef void DynGenFunc();
static DynGenFunc* DispatcherEvent = NULL;
static DynGenFunc* DispatcherReg = NULL;
static DynGenFunc* JITCompile = NULL;
static DynGenFunc* JITCompileInBlock = NULL;
static DynGenFunc* EnterRecompiledCode = NULL;
static DynGenFunc* ExitRecompiledCode = NULL;
static void recEventTest()
{
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
_cpuBranchTest_Shared();
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
}
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( wxsFormat( L"(R5900 Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !IsDevBuild ) return;
// --------- EBP Here -----------
xCMP( ebp, &s_store_ebp );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, &s_store_ebp ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, &s_store_esp );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, &s_store_esp ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile()
{
pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( ecx, &cpuRegs.pc );
xCALL( recRecompile );
xMOV( eax, &cpuRegs.pc );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[recLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
return (DynGenFunc*)retval;
}
static DynGenFunc* _DynGen_JITCompileInBlock()
{
u8* retval = xGetPtr();
xJMP( JITCompile );
return (DynGenFunc*)retval;
}
// called when jumping to variable pc address
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( eax, &cpuRegs.pc );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[recLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
return (DynGenFunc*)retval;
}
static DynGenFunc* _DynGen_EnterRecompiledCode()
{
u8* retval = xGetPtr();
// "standard" frame pointer setup for aligned stack: Record the original
// esp into ebp, and then align esp. ebp references the original esp base
// for the duration of our function, and is used to restore the original
// esp before returning from the function
// Optimization: We "allocate" 0x10 bytes of stack ahead of time here, which we can
// use for supplying parameters to cdecl functions.
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
// First 0x10 is for esi, edi, etc. Second 0x10 is for the return address and ebp. The
// third 0x10 is for C-style CDECL calls we might make from the recompiler
// (parameters for those calls can be stored there!)
xSUB( esp, 0x30 );
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
// Simulate a CALL function by pushing the call address and EBP onto the stack.
xMOV( ptr32[esp+0x1c], 0xffeeff );
uptr& imm = *(uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
xMOV( ptr32[esp+0x18], ebp );
xLEA( ebp, ptr32[esp+0x18] );
xMOV( &s_store_esp, esp );
xMOV( &s_store_ebp, ebp );
xJMP( ptr32[&DispatcherReg] );
imm = (uptr)xGetPtr();
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
xRET();
return (DynGenFunc*)retval;
}
static void _DynGen_Dispatchers()
{
// In case init gets called multiple times:
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( eeRecDispatchers );
xSetPtr( eeRecDispatchers );
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
// most and stand to benefit from strong alignment and direct referencing.
DispatcherEvent = (DynGenFunc*)xGetPtr();
xCALL( recEventTest );
DispatcherReg = _DynGen_DispatcherReg();
JITCompile = _DynGen_JITCompile();
JITCompileInBlock = _DynGen_JITCompileInBlock();
EnterRecompiledCode = _DynGen_EnterRecompiledCode();
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadOnly, true );
recBlocks.SetJITCompile( JITCompile );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
static const int REC_CACHEMEM = 0x01000000;
@ -377,6 +570,7 @@ static void recAlloc()
// No errors.. Proceed with initialization:
ProfilerRegisterSource( "EERec", recMem, REC_CACHEMEM+0x1000 );
_DynGen_Dispatchers();
x86FpuState = FPU_STATE;
}
@ -479,141 +673,85 @@ void recStep( void )
{
}
extern "C"
#ifndef PCSX2_SEH
// <--- setjmp/longjmp model <---
#include "GS.h"
#include "System/SysThreads.h"
static void StateThreadCheck_LongJmp()
{
void recEventTest()
{
#ifdef PCSX2_DEVBUILD
// dont' remove this check unless doing an official release
if( g_globalXMMSaved || g_globalMMXSaved)
{
DevCon.Error("PCSX2 Foopah! Frozen regs have not been restored!!!");
DevCon.Error("g_globalXMMSaved = %d,g_globalMMXSaved = %d", g_globalXMMSaved, g_globalMMXSaved);
}
assert( !g_globalXMMSaved && !g_globalMMXSaved);
#endif
setjmp( SetJmp_StateCheck );
// Perform counters, interrupts, and IOP updates:
_cpuBranchTest_Shared();
int oldstate;
#ifdef PCSX2_DEVBUILD
assert( !g_globalXMMSaved && !g_globalMMXSaved);
#endif
// Important! Most of the console logging and such has cancel points in it. This is great
// in Windows, where SEH lets us safely kill a thread from anywhere we want. This is bad
// in Linux, which cannot have a C++ exception cross the recompiler. Hence the changing
// of the cancelstate here!
pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, &oldstate );
mtgsThread.RethrowException();
SysCoreThread::Get().StateCheckInThread();
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate );
}
}
////////////////////////////////////////////////////
static u32 g_lastpc = 0;
#ifdef _MSC_VER
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static __naked void JITCompile()
{
__asm {
mov esi, dword ptr [cpuRegs.pc]
push esi
call recRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [recLUT+esi*4]
jmp dword ptr [ecx+ebx]
}
}
static __naked void JITCompileInBlock()
{
__asm {
jmp JITCompile
}
}
// called when jumping to variable pc address
static void __naked DispatcherReg()
{
__asm {
mov eax, dword ptr [cpuRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [recLUT+eax*4]
jmp dword ptr [ecx+ebx]
}
}
// [TODO] : Replace these functions with x86Emitter-generated code and we can compound this
// function and DispatcherReg() into a fast fall-through case (removes the DispatcerReg jump
// in this function, since execution will just fall right into the DispatcherReg implementation).
//
static void __naked DispatcherEvent()
{
__asm
{
call recEventTest;
jmp DispatcherReg;
}
}
#endif
static void recExecute()
{
// Implementation Notes:
// This function enter an endless loop, which is only escapable via C++ exception handling.
// The loop is needed because some things in the rec use "ret" as a shortcut to
// invoking DispatcherReg. These things are code bits which are called infrequently,
// such as dyna_block_discard and dyna_page_reset.
StateThreadCheck_LongJmp();
try
switch( setjmp( SetJmp_RecExecute ) )
{
while( true )
{
// Note: make sure the FreezeRegs boolean is reset to true here, because
// it might be set to false, depending on if the rec exits from the context of
// an EventTest or not.
case SetJmp_Exit: break;
case 0:
case SetJmp_Dispatcher:
// Typically the Dispatcher is invoked from the EventTest code, which clears
// the FreezeRegs flag, so always be sure to reset it here:
g_EEFreezeRegs = true;
try
{
#ifdef _MSC_VER
__asm
{
push ebx
push esi
push edi
call DispatcherReg
pop edi
pop esi
pop ebx
}
#else // _MSC_VER
DispatcherReg();
#endif
}
catch( Exception::ForceDispatcherReg& )
{
}
}
}
catch( Exception::ExitRecExecute& )
{
while( true )
EnterRecompiledCode();
break;
}
g_EEFreezeRegs = false;
}
namespace R5900 {
namespace Dynarec {
namespace OpcodeImpl {
#else
// ---> SEH Model --->
static void recExecute()
{
// Implementation Notes:
// [TODO] fix this comment to explain various code entry/exit points, when I'm not so tired!
try
{
while( true )
{
// Typically the Dispatcher is invoked from the EventTest code, which clears
// the FreezeRegs flag, so always be sure to reset it here:
g_EEFreezeRegs = true;
try {
EnterRecompiledCode();
}
catch( Exception::ForceDispatcherReg& ) { }
}
}
catch( Exception::ExitRecExecute& ) { }
g_EEFreezeRegs = false;
}
#endif
////////////////////////////////////////////////////
void recSYSCALL( void ) {
void R5900::Dynarec::OpcodeImpl::recSYSCALL( void )
{
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_NODESTROY);
@ -622,13 +760,14 @@ void recSYSCALL( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 ));
xJMP( DispatcherReg );
x86SetJ8(j8Ptr[0]);
//branch = 2;
}
////////////////////////////////////////////////////
void recBREAK( void ) {
void R5900::Dynarec::OpcodeImpl::recBREAK( void )
{
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_EVERYTHING);
@ -637,13 +776,11 @@ void recBREAK( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
RET();
xJMP( DispatcherEvent );
x86SetJ8(j8Ptr[0]);
//branch = 2;
}
} } } // end namespace R5900::Dynarec::OpcodeImpl
// Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default.
static __releaseinline void ClearRecLUT(BASEBLOCK* base, int count)
{
@ -703,26 +840,34 @@ void recClear(u32 addr, u32 size)
upperextent = min(upperextent, ceiling);
#ifdef PCSX2_DEVBUILD
for (int i = 0; pexblock = recBlocks[i]; i++) {
if (s_pCurBlock == PC_GETBLOCK(pexblock->startpc))
continue;
u32 blockend = pexblock->startpc + pexblock->size * 4;
if (pexblock->startpc >= addr && pexblock->startpc < addr + size * 4
|| pexblock->startpc < addr && blockend > addr) {
Console.Error( "Impossible block clearing failure" );
pxFail( "Impossible block clearing failure" );
DevCon.Error( "Impossible block clearing failure" );
pxFailDev( "Impossible block clearing failure" );
}
}
#endif
if (upperextent > lowerextent)
ClearRecLUT(PC_GETBLOCK(lowerextent), (upperextent - lowerextent) / 4);
}
#ifndef PCSX2_SEH
jmp_buf SetJmp_RecExecute;
jmp_buf SetJmp_StateCheck;
#endif
static void ExitRec()
{
#ifdef PCSX2_SEH
throw Exception::ExitRecExecute();
#else
longjmp( SetJmp_RecExecute, SetJmp_Exit );
#endif
}
// check for end of bios
@ -730,11 +875,22 @@ void CheckForBIOSEnd()
{
xMOV( eax, &cpuRegs.pc );
xCMP( eax, 0x00200008 );
xJE( ExitRec );
/*xCMP( eax, 0x00200008 );
xJE(ExitRec);
xCMP( eax, 0x00100008 );
xJE( ExitRec );
xJE(ExitRec);*/
xCMP( eax, 0x00200008 );
xForwardJE8 CallExitRec;
xCMP( eax, 0x00100008 );
xForwardJNE8 SkipExitRec;
CallExitRec.SetTarget();
xCALL( ExitRec );
SkipExitRec.SetTarget();
}
static int *s_pCode;
@ -833,7 +989,10 @@ void LoadBranchState()
void iFlushCall(int flushtype)
{
_freeX86regs();
// Free registers that are not saved across function calls (x86-32 ABI):
_freeX86reg(EAX);
_freeX86reg(ECX);
_freeX86reg(EDX);
if( flushtype & FLUSH_FREE_XMM )
_freeXMMregs();
@ -934,6 +1093,8 @@ static u32 eeScaleBlockCycles()
// setting "branch = 2";
static void iBranchTest(u32 newpc)
{
_DynGen_StackFrameCheck();
if( g_ExecBiosHack ) CheckForBIOSEnd();
// Check the Event scheduler if our "cycle target" has been reached.
@ -1136,8 +1297,9 @@ static void printfn()
static int curcount = 0;
const int skip = 0;
assert( !g_globalMMXSaved );
assert( !g_globalXMMSaved );
pxAssert( !g_globalMMXSaved && !g_globalXMMSaved );
//pxAssert( cpuRegs.pc != 0x80001300 );
if( (dumplog&2) && g_lastpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) {
curcount++;
@ -1151,18 +1313,22 @@ static void printfn()
}
}
u32 s_recblocks[] = {0};
void badespfn() {
Console.Error("Bad esp!");
assert(0);
}
static u32 s_recblocks[] = {0};
// Called when a block under manual protection fails it's pre-execution integrity check.
void __fastcall dyna_block_discard(u32 start,u32 sz)
{
DevCon.WriteLn("dyna_block_discard .. start=0x%08X size=%d", start, sz*4);
recClear(start, sz);
// Stack trick: This function was invoked via a direct jmp, so manually pop the
// EBP/stackframe before issuing a RET, else esp/ebp will be incorrect.
#ifdef _MSC_VER
__asm leave __asm jmp [ExitRecompiledCode]
#else
__asm__ __volatile__( "leave\n jmp *%[exitRec]\n" : : [exitRec] "m" (ExitRecompiledCode) : );
#endif
}
// called when a block under manual protection has been run enough times to be a
@ -1172,9 +1338,15 @@ void __fastcall dyna_page_reset(u32 start,u32 sz)
recClear(start & ~0xfffUL, 0x400);
manual_counter[start >> 12]++;
mmap_MarkCountedRamPage( start );
#ifdef _MSC_VER
__asm leave __asm jmp [ExitRecompiledCode]
#else
__asm__ __volatile__( "leave\n jmp *%[exitRec]\n" : : [exitRec] "m" (ExitRecompiledCode) : );
#endif
}
void recRecompile( const u32 startpc )
static void __fastcall recRecompile( const u32 startpc )
{
u32 i = 0;
u32 branchTo;

View File

@ -98,7 +98,7 @@ void recWritebackHILO(int info, int writed, int upper)
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE|MODE_READ);
if( regd >= 0 ) {
SSE_MOVLPS_M64_to_XMM(regd, loaddr);
regd |= 0x8000;
regd |= MEM_XMMTAG;
}
}
}

View File

@ -19,6 +19,8 @@
#include "iR5900.h"
#include "R5900OpcodeTables.h"
using namespace x86Emitter;
extern void _vu0WaitMicro();
extern void _vu0FinishMicro();
@ -311,14 +313,12 @@ static void recCTC2() {
}
else MOV32ItoM((uptr)&microVU0.regs->VI[_Rd_].UL, 0);
break;
case REG_CMSAR1:
case REG_CMSAR1: // Execute VU1 Micro SubRoutine
if (_Rt_) {
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
PUSH32R(EAX);
MOV32MtoR(ECX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
}
else PUSH32I(0);
CALLFunc((uptr)vu1ExecMicro); // Execute VU1 Micro SubRoutine
ADD32ItoR(ESP, 4);
else XOR32RtoR(ECX,ECX);
xCALL(vu1ExecMicro);
break;
case REG_FBRST:
if (!_Rt_) {

View File

@ -1,6 +1,6 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
@ -57,7 +57,7 @@ extern void iDumpVU1Registers();
#define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on...
// use x86reg caching (faster) (not really. rather lots slower :p (rama) )
// use x86reg caching (faster) (not really. rather lots slower :p (rama) )
// ... and buggy too since we disabled EBP. Causes GoW2 to hang. Let's get rid of it,
// sVU is only here to serve as a regression model for Nan/INF behavior anyway. (--air)
//#define SUPERVU_X86CACHING
@ -65,7 +65,7 @@ extern void iDumpVU1Registers();
// registers won't be flushed at block boundaries (faster) (nothing noticable speed-wise, causes SPS in Ratchet and clank (Nneeve) )
#ifndef PCSX2_DEBUG
//#define SUPERVU_INTERCACHING
//#define SUPERVU_INTERCACHING
#endif
#define SUPERVU_CHECKCONDITION 0 // has to be 0!!
@ -75,8 +75,8 @@ extern void iDumpVU1Registers();
#define _Imm11_ (s32)( (vucode & 0x400) ? (0xfffffc00 | (vucode & 0x3ff)) : (vucode & 0x3ff) )
#define _UImm11_ (s32)(vucode & 0x7ff)
#define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register
#define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register
#define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register
#define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register
#define _Fd_ ((VU->code >> 6) & 0x1F) // The sa part of the instruction register
#define _It_ (_Ft_ & 15)
#define _Is_ (_Fs_ & 15)
@ -351,7 +351,7 @@ void SuperVUAlloc(int vuindex)
// upper 4 bits must be zero!
// Changed "first try base" to 0xf1e0000, since 0x0c000000 liked to fail a lot. (cottonvibes)
s_recVUMem = SysMmapEx(0xf1e0000, VU_EXESIZE, 0x10000000, "SuperVUAlloc");
if (s_recVUMem == NULL)
{
throw Exception::OutOfMemory(
@ -382,27 +382,27 @@ void SuperVUAlloc(int vuindex)
void DestroyCachedHeaders(int vuindex, int j)
{
list<VuFunctionHeader*>::iterator it = s_plistCachedHeaders[vuindex][j].begin();
while (it != s_plistCachedHeaders[vuindex][j].end())
{
delete *it;
it++;
}
s_plistCachedHeaders[vuindex][j].clear();
s_plistCachedHeaders[vuindex][j].clear();
}
void DestroyVUHeaders(int vuindex)
{
list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();
while (it != s_listVUHeaders[vuindex].end())
{
delete *it;
it++;
}
s_listVUHeaders[vuindex].clear();
s_listVUHeaders[vuindex].clear();
}
// destroy VU resources
@ -595,7 +595,7 @@ void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
{
eff.Printf( "block:%c %x-%x; children: ", ((*itblock)->type&BLOCKTYPE_HASEOP) ? '*' : ' ',
(*itblock)->startpc, (*itblock)->endpc - 8);
for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++)
{
eff.Printf("%x ", (*itchild)->startpc);
@ -643,9 +643,9 @@ void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
eff.Printf( "STR: ");
for (i = 0; i < iREGCNT_GPR; ++i)
{
if (pregs[i].inuse)
if (pregs[i].inuse)
eff.Printf( "%.2d ", pregs[i].reg);
else
else
eff.Printf( "-1 ");
}
eff.Printf( "\n");
@ -657,9 +657,9 @@ void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
pregs = &s_vecRegArray[(*itblock)->nEndx86];
for (i = 0; i < iREGCNT_GPR; ++i)
{
if (pregs[i].inuse)
if (pregs[i].inuse)
eff.Printf( "%.2d ", pregs[i].reg);
else
else
eff.Printf( "-1 ");
}
eff.Printf( "\n");
@ -687,7 +687,7 @@ void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
}
}
//
//
#if 0 // __LINUX__
// dump the asm
@ -756,9 +756,9 @@ void* SuperVUGetProgram(u32 startpc, int vuindex)
assert(s_TotalVUCycles > 0);
if (vuindex)
VU1.VI[REG_TPC].UL = startpc;
else
else
VU0.VI[REG_TPC].UL = startpc;
return (void*)SuperVUEndProgram;
}
@ -2156,16 +2156,16 @@ void VuBaseBlock::AssignVFRegs()
itinst->vfread0[i] = itinst->vfread1[i] = itinst->vfwrite[i] = itinst->vfacc[i] = -1;
itinst->vfflush[i] = -1;
if (regs->VFread0)
if (regs->VFread0)
itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, regs->VFread0, 0);
else if (regs->VIread & (1 << REG_VF0_FLAG))
else if (regs->VIread & (1 << REG_VF0_FLAG))
itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, 0, 0);
if (regs->VFread1)
if (regs->VFread1)
itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, regs->VFread1, 0);
else if ((regs->VIread & (1 << REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff)
else if ((regs->VIread & (1 << REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff)
itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, 0, 0);
if (regs->VIread & (1 << REG_ACC_FLAG)) itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
int reusereg = -1; // 0 - VFwrite, 1 - VFAcc
@ -2224,15 +2224,15 @@ void VuBaseBlock::AssignVFRegs()
{
if (itnext == insts.end() || (itnext->livevars[1]&regs->VFread0)) _freeXMMreg(itinst->vfread0[i]);
xmmregs[itinst->vfread0[i]].inuse = 1;
xmmregs[itinst->vfread0[i]].reg = reg;
xmmregs[itinst->vfread0[i]].type = type;
xmmregs[itinst->vfread0[i]].mode = 0;
if (reusereg)
itinst->vfacc[i] = itinst->vfread0[i];
else
else
itinst->vfwrite[i] = itinst->vfread0[i];
}
else if (itinst->vfread1[i] >= 0 && lastwrite != itinst->vfread1[i] &&
@ -2240,21 +2240,21 @@ void VuBaseBlock::AssignVFRegs()
{
if (itnext == insts.end() || (itnext->livevars[1]&regs->VFread1)) _freeXMMreg(itinst->vfread1[i]);
xmmregs[itinst->vfread1[i]].inuse = 1;
xmmregs[itinst->vfread1[i]].reg = reg;
xmmregs[itinst->vfread1[i]].type = type;
xmmregs[itinst->vfread1[i]].mode = 0;
if (reusereg)
if (reusereg)
itinst->vfacc[i] = itinst->vfread1[i];
else
else
itinst->vfwrite[i] = itinst->vfread1[i];
}
else
{
if (reusereg)
if (reusereg)
itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
else
else
itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0);
}
}
@ -2276,7 +2276,7 @@ void VuBaseBlock::AssignVFRegs()
{
// CLIP inst, need two extra regs
if (free0 < 0) free0 = _allocTempXMMreg(XMMT_FPS, -1);
free1 = _allocTempXMMreg(XMMT_FPS, -1);
free2 = _allocTempXMMreg(XMMT_FPS, -1);
_freeXMMreg(free1);
@ -2361,12 +2361,12 @@ void VuBaseBlock::AssignVIRegs(int parent)
if (parents.size() > 0)
{
u32 usedvars2 = 0xffffffff;
for(itparent = parents.begin(); itparent != parents.end(); itparent++)
{
usedvars2 &= (*itparent)->insts.front().usedvars[0];
}
usedvars |= usedvars2;
}
@ -2413,7 +2413,7 @@ void VuBaseBlock::AssignVIRegs(int parent)
s_markov.children.push_back(this);
type |= BLOCKTYPE_ANALYZED;
for(itparent = parents.begin(); itparent != parents.end(); itparent++)
{
(*itparent)->AssignVIRegs(1);
@ -2628,17 +2628,11 @@ __declspec(naked) static void SuperVUEndProgram()
mov esi, s_vu1esi
mov edi, s_vuedi
mov ebx, s_vuebx
}
#ifdef PCSX2_DEBUG
__asm
{
sub s_vu1esp, esp
}
#endif
__asm
{
call SuperVUCleanupProgram
jmp s_callstack // so returns correctly
}
@ -2723,9 +2717,9 @@ static void SuperVURecompile()
{
(*itblock)->type &= ~BLOCKTYPE_ANALYZED;
}
s_listBlocks.front()->Recompile();
// make sure everything compiled
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
@ -2761,7 +2755,7 @@ static void SuperVURecompile()
JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5));
}
// only other case is when there are two branches
else
else
{
assert((*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH);
}
@ -2776,7 +2770,7 @@ static void SuperVURecompile()
(*itblock)->pChildJumps[i] = (u32*)((uptr)(*itblock)->pChildJumps[i] & 0x7fffffff);
*(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode - ((uptr)(*itblock)->pChildJumps[i] + 4);
}
else
else
{
*(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode;
}
@ -2860,7 +2854,7 @@ void SuperVUFreeXMMregs(u32* livevars)
SSE_MOVHPS_XMM_to_M64(addr, (x86SSERegType)i);
SSE_SHUFPS_M128_to_XMM((x86SSERegType)i, addr, 0xc4);
}
else
else
{
SSE_MOVHPS_M64_to_XMM((x86SSERegType)i, addr + 8);
}
@ -2907,7 +2901,7 @@ void VuBaseBlock::Recompile()
MOV32ItoM((uptr)&s_vufnheader, s_pFnHeader->startpc);
MOV32ItoM((uptr)&VU->VI[REG_TPC], startpc);
MOV32ItoM((uptr)&s_svulast, startpc);
list<VuBaseBlock*>::iterator itparent;
for (itparent = parents.begin(); itparent != parents.end(); ++itparent)
{
@ -3023,9 +3017,9 @@ void VuBaseBlock::Recompile()
_freeX86regs();
AND32ItoM((uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu ? ~0x100 : ~0x001); // E flag
AND32ItoM((uptr)&VU->vifRegs->stat, ~VIF1_STAT_VEW);
if (!branch) MOV32ItoM((uptr)&VU->VI[REG_TPC], endpc);
JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5));
}
else
@ -3111,7 +3105,7 @@ void VuBaseBlock::Recompile()
else
x86regs[i].inuse = 0;
}
else
else
#endif
{
_freeX86reg(i);
@ -3427,7 +3421,7 @@ void VuInstruction::Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz)
// else
// MOV32MtoR(EAX, (uptr)&VU->VI[REG_STATUS_FLAG]);
// s_StatusRead = tempstatus;
if (s_StatusRead == 0)
s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG];
@ -3630,7 +3624,8 @@ void VuInstruction::Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz)
TEST32ItoM((uptr)&VU0.VI[REG_FBRST].UL, s_vu ? 0x400 : 0x004);
u8* ptr = JZ8(0);
OR32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, s_vu ? 0x200 : 0x002);
_callFunctionArg1((uptr)hwIntcIrq, MEM_CONSTTAG, s_vu ? INTC_VU1 : INTC_VU0);
xMOV( ecx, s_vu ? INTC_VU1 : INTC_VU0 );
xCALL( hwIntcIrq );
x86SetJ8(ptr);
}
if (ptr[1] & 0x08000000) // T flag
@ -3638,7 +3633,8 @@ void VuInstruction::Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz)
TEST32ItoM((uptr)&VU0.VI[REG_FBRST].UL, s_vu ? 0x800 : 0x008);
u8* ptr = JZ8(0);
OR32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, s_vu ? 0x400 : 0x004);
_callFunctionArg1((uptr)hwIntcIrq, MEM_CONSTTAG, s_vu ? INTC_VU1 : INTC_VU0);
xMOV( ecx, s_vu ? INTC_VU1 : INTC_VU0 );
xCALL( hwIntcIrq );
x86SetJ8(ptr);
}
@ -3785,7 +3781,7 @@ void VuInstruction::Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz)
_freeX86reg(x86temp);
}
// waitq
if (ptr[0] == 0x800003bf) SuperVUFlush(0, 1);
// waitp
@ -4357,7 +4353,7 @@ void recVUMI_XGKICK(VURegs *VU, int info)
recVUMI_XGKICK_(VU);
}
int isreg = _allocX86reg(X86ARG2, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
int isreg = _allocX86reg(ECX, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
_freeX86reg(isreg); // flush
x86regs[isreg].inuse = 1;
x86regs[isreg].type = X86TYPE_VITEMP;
@ -4366,12 +4362,12 @@ void recVUMI_XGKICK(VURegs *VU, int info)
SHL32ItoR(isreg, 4);
AND32ItoR(isreg, 0x3fff);
s_XGKICKReg = isreg;
if (!SUPERVU_XGKICKDELAY || pc == s_pCurBlock->endpc) {
recVUMI_XGKICK_(VU);
}
else {
s_ScheduleXGKICK = (CHECK_XGKICKHACK) ? (min((u32)4, (s_pCurBlock->endpc-pc)/8)) : 2;
s_ScheduleXGKICK = (CHECK_XGKICKHACK) ? (min((u32)4, (s_pCurBlock->endpc-pc)/8)) : 2;
}
}

View File

@ -241,7 +241,7 @@ static void CvtPacketToFloat( StereoOut32* srcdest )
// Parameter note: Size should always be a multiple of 128, thanks!
static void CvtPacketToInt( StereoOut32* srcdest, uint size )
{
jASSUME( (size & 127) == 0 );
//jASSUME( (size & 127) == 0 );
const StereoOutFloat* src = (StereoOutFloat*)srcdest;
StereoOut32* dest = srcdest;