Reintegrated 'aligned_stack' branch. General summary [interesting to Devs only, really]:

* EEcore recompiler aligns the stack on entry for all platforms.
 * IOP recompiler aligns stack for GCC/Mac by default (can be force-enabled for all platforms via compiler define)
 * Added setjmp/longjmp to the EEcore recompiler, used by GCC to exit the recompiler in efficient form (Win32 platforms use SEH).
 * aR3000a.S and aR5900.S removed and replaced with x86Emitter generated dispatchers.
 * All C functions called from recompiled code use __fastcall (simple, fast, retains stack alignment in neat fashion)


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2054 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-10-21 21:16:07 +00:00
commit e4330ee0ee
47 changed files with 1042 additions and 1107 deletions

View File

@ -66,8 +66,8 @@
<Add option="-fno-guess-branch-probability" />
<Add option="-fno-dse" />
<Add option="-fno-tree-dse" />
<Add option="-fno-strict-aliasing" />
<Add option="-pipe -msse -msse2" />
<Add option="-mpreferred-stack-boundary=2" />
<Add option="-m32" />
<Add directory="../../include/Utilities" />
<Add directory="../../include" />

View File

@ -67,8 +67,8 @@
<Add option="-fno-guess-branch-probability" />
<Add option="-fno-dse" />
<Add option="-fno-tree-dse" />
<Add option="-fno-strict-aliasing" />
<Add option="-pipe -msse -msse2" />
<Add option="-mpreferred-stack-boundary=2" />
<Add option="-m32" />
<Add directory="../../include/x86emitter" />
<Add directory="../../include" />

View File

@ -38,7 +38,7 @@
Console.Error( ex.what() ); \
}
#ifdef __GNUC__
#ifdef __GNUG__
# define DESTRUCTOR_CATCHALL __DESTRUCTOR_CATCHALL( __PRETTY_FUNCTION__ )
#else
# define DESTRUCTOR_CATCHALL __DESTRUCTOR_CATCHALL( __FUNCTION__ )

View File

@ -146,6 +146,7 @@ namespace Threading
void WaitRaw();
bool WaitRaw( const wxTimeSpan& timeout );
void WaitNoCancel();
void WaitNoCancel( const wxTimeSpan& timeout );
int Count();
void Wait();

View File

@ -18,6 +18,17 @@
// Implementations found here: CALL and JMP! (unconditional only)
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
#ifdef __GNUG__
// GCC has a bug that causes the templated function handler for Jmp/Call emitters to generate
// bad asm code. (error is something like "7#*_uber_379s_mangled_$&02_name is already defined!")
// Using GCC's always_inline attribute fixes it. This differs from __forceinline in that it
// inlines *even in debug builds* which is (usually) undesirable.
// ... except when it avoids compiler bugs.
# define __always_inline_tmpl_fail __attribute__((always_inline))
#else
# define __always_inline_tmpl_fail
#endif
// ------------------------------------------------------------------------
template< bool isJmp >
class xImpl_JmpCall
@ -33,8 +44,8 @@ public:
// Special form for calling functions. This form automatically resolves the
// correct displacement based on the size of the instruction being generated.
template< typename T >
__forceinline void operator()( T* func ) const
template< typename T > __forceinline __always_inline_tmpl_fail
void operator()( T* func ) const
{
if( isJmp )
xJccKnownTarget( Jcc_Unconditional, (void*)(uptr)func, false ); // double cast to/from (uptr) needed to appease GCC

View File

@ -188,6 +188,7 @@ namespace x86Emitter
// ----- Miscellaneous Instructions -----
// Various Instructions with no parameter and no special encoding logic.
extern void xLEAVE();
extern void xRET();
extern void xCBW();
extern void xCWD();

View File

@ -31,11 +31,6 @@ typedef int x86IntRegType;
#define EBP 5
#define ESP 4
#define X86ARG1 EAX
#define X86ARG2 ECX
#define X86ARG3 EDX
#define X86ARG4 EBX
#define MM0 0
#define MM1 1
#define MM2 2

View File

@ -163,6 +163,14 @@ void Threading::Semaphore::WaitNoCancel()
pthread_setcancelstate( oldstate, NULL );
}
void Threading::Semaphore::WaitNoCancel( const wxTimeSpan& timeout )
{
int oldstate;
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate );
WaitRaw( timeout );
pthread_setcancelstate( oldstate, NULL );
}
int Threading::Semaphore::Count()
{
int retval;

View File

@ -212,7 +212,9 @@ void Threading::PersistentThread::Block()
bool Threading::PersistentThread::IsSelf() const
{
return pthread_self() == m_thread;
// Detached threads may have their pthread handles recycled as newer threads, causing
// false IsSelf reports.
return !m_detached && (pthread_self() == m_thread);
}
bool Threading::PersistentThread::IsRunning() const
@ -387,10 +389,10 @@ void Threading::PersistentThread::OnStartInThread()
void Threading::PersistentThread::_internal_execute()
{
m_lock_InThread.Lock();
_DoSetThreadName( m_name );
OnStartInThread();
_DoSetThreadName( m_name );
_try_virtual_invoke( &PersistentThread::ExecuteTaskInThread );
}
@ -409,7 +411,7 @@ void Threading::PersistentThread::OnCleanupInThread()
// callback function
void* Threading::PersistentThread::_internal_callback( void* itsme )
{
jASSUME( itsme != NULL );
pxAssert( itsme != NULL );
PersistentThread& owner = *((PersistentThread*)itsme);
pthread_cleanup_push( _pt_callback_cleanup, itsme );

View File

@ -729,6 +729,7 @@ __forceinline void xPOPFD() { xWrite8( 0x9D ); }
//////////////////////////////////////////////////////////////////////////////////////////
//
__forceinline void xLEAVE() { xWrite8( 0xC9 ); }
__forceinline void xRET() { xWrite8( 0xC3 ); }
__forceinline void xCBW() { xWrite16( 0x9866 ); }
__forceinline void xCWD() { xWrite8( 0x98 ); }

View File

@ -38,7 +38,7 @@ __releaseinline void UpdateCP0Status() {
cpuTestHwInts();
}
void WriteCP0Status(u32 value) {
void __fastcall WriteCP0Status(u32 value) {
cpuRegs.CP0.n.Status.val = value;
UpdateCP0Status();
}
@ -221,7 +221,7 @@ __forceinline void COP0_UpdatePCCR()
//if( cpuRegs.CP0.n.Status.b.ERL || !cpuRegs.PERF.n.pccr.b.CTE ) return;
// TODO : Implement memory mode checks here (kernel/super/user)
// For now we just assume user mode.
// For now we just assume kernel mode.
if( cpuRegs.PERF.n.pccr.val & 0xf )
{

View File

@ -16,7 +16,7 @@
#ifndef __COP0_H__
#define __COP0_H__
extern void WriteCP0Status(u32 value);
extern void __fastcall WriteCP0Status(u32 value);
extern void UpdateCP0Status();
extern void WriteTLB(int i);
extern void UnmapTLB(int i);

View File

@ -440,6 +440,8 @@ __forceinline void rcntUpdate_hScanline()
}
}
bool CoreCancelDamnit = false;
__forceinline void rcntUpdate_vSync()
{
s32 diff = (cpuRegs.cycle - vsyncCounter.sCycle);
@ -448,17 +450,24 @@ __forceinline void rcntUpdate_vSync()
if (vsyncCounter.Mode == MODE_VSYNC)
{
eeRecIsReset = false;
#ifndef PCSX2_SEH
if( CoreCancelDamnit || SysCoreThread::Get().HasPendingStateChangeRequest() )
{
longjmp( SetJmp_StateCheck, 1 );
}
#else
mtgsThread.RethrowException();
SysCoreThread::Get().StateCheckInThread();
#endif
if( eeRecIsReset )
{
eeRecIsReset = false;
cpuSetBranch();
// Hack! GCC is unwilling to let us throw exceptions here.
// (Ones in Exception::*, anyways.) Work around it by skipping
// it.
#ifdef _MSC_VER
#ifndef PCSX2_SEH
longjmp( SetJmp_RecExecute, SetJmp_Dispatcher );
#else
throw Exception::ForceDispatcherReg();
#endif
}

View File

@ -102,6 +102,7 @@ protected:
int m_CopyCommandTally;
int m_CopyDataTally;
volatile bool m_RingBufferIsBusy;
volatile bool m_PluginOpened;
// Counts the number of vsync frames queued in the MTGS ringbuffer. This is used to
// throttle the number of frames allowed to be rendered ahead of time for games that
@ -122,8 +123,6 @@ public:
mtgsThreadObject();
virtual ~mtgsThreadObject() throw();
void OnStart();
// Waits for the GS to empty out the entire ring buffer contents.
// Used primarily for plugin startup/shutdown.
void WaitGS();
@ -145,11 +144,15 @@ public:
protected:
void OpenPlugin();
void ClosePlugin();
void OnStart();
void OnResumeReady();
void OnSuspendInThread();
void OnPauseInThread() {}
void OnResumeInThread( bool IsSuspended );
void OnResumeReady();
void OnCleanupInThread();
// Saves MMX/XMM REGS, posts an event to the mtgsThread flag and releases a timeslice.
// For use in surrounding loops that wait on the mtgs.

View File

@ -115,7 +115,7 @@ void psxMemShutdown()
psxMemRLUT = NULL;
}
u8 iopMemRead8(u32 mem)
u8 __fastcall iopMemRead8(u32 mem)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -159,7 +159,7 @@ u8 iopMemRead8(u32 mem)
}
}
u16 iopMemRead16(u32 mem)
u16 __fastcall iopMemRead16(u32 mem)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -225,7 +225,7 @@ u16 iopMemRead16(u32 mem)
}
}
u32 iopMemRead32(u32 mem)
u32 __fastcall iopMemRead32(u32 mem)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -294,7 +294,7 @@ u32 iopMemRead32(u32 mem)
}
}
void iopMemWrite8(u32 mem, u8 value)
void __fastcall iopMemWrite8(u32 mem, u8 value)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -356,7 +356,7 @@ void iopMemWrite8(u32 mem, u8 value)
}
}
void iopMemWrite16(u32 mem, u16 value)
void __fastcall iopMemWrite16(u32 mem, u16 value)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;
@ -440,7 +440,7 @@ void iopMemWrite16(u32 mem, u16 value)
}
}
void iopMemWrite32(u32 mem, u32 value)
void __fastcall iopMemWrite32(u32 mem, u32 value)
{
mem &= 0x1fffffff;
u32 t = mem >> 16;

View File

@ -75,24 +75,24 @@ static __forceinline u8* iopPhysMem( u32 addr )
#define psxHu16(mem) (*(u16*)&psxH[(mem) & 0xffff])
#define psxHu32(mem) (*(u32*)&psxH[(mem) & 0xffff])
void psxMemAlloc();
void psxMemReset();
void psxMemShutdown();
extern void psxMemAlloc();
extern void psxMemReset();
extern void psxMemShutdown();
u8 iopMemRead8 (u32 mem);
u16 iopMemRead16(u32 mem);
u32 iopMemRead32(u32 mem);
void iopMemWrite8 (u32 mem, u8 value);
void iopMemWrite16(u32 mem, u16 value);
void iopMemWrite32(u32 mem, u32 value);
extern u8 __fastcall iopMemRead8 (u32 mem);
extern u16 __fastcall iopMemRead16(u32 mem);
extern u32 __fastcall iopMemRead32(u32 mem);
extern void __fastcall iopMemWrite8 (u32 mem, u8 value);
extern void __fastcall iopMemWrite16(u32 mem, u16 value);
extern void __fastcall iopMemWrite32(u32 mem, u32 value);
// x86reg and mmreg are always x86 regs
void psxRecMemRead8();
void psxRecMemRead16();
void psxRecMemRead32();
void psxRecMemWrite8();
void psxRecMemWrite16();
void psxRecMemWrite32();
extern void psxRecMemRead8();
extern void psxRecMemRead16();
extern void psxRecMemRead32();
extern void psxRecMemWrite8();
extern void psxRecMemWrite16();
extern void psxRecMemWrite32();
namespace IopMemory
{

View File

@ -44,14 +44,15 @@ void SysPageFaultExceptionFilter( int signal, siginfo_t *info, void * )
// get bad virtual address
uptr offset = (u8*)info->si_addr - psM;
DevCon.Status( "Protected memory cleanup. Offset 0x%x", offset );
if (offset>=Ps2MemSize::Base)
{
// Bad mojo! Completely invalid address.
// Instigate a crash or abort emulation or something.
assert( false );
wxTrap();
if( !IsDebugBuild )
raise( SIGKILL );
}
DevCon.Status( "Protected memory cleanup. Offset 0x%x", offset );
mmap_ClearCpuBlock( offset & ~m_pagemask );
}

View File

@ -96,8 +96,8 @@
<Add option="-fno-guess-branch-probability" />
<Add option="-fno-dse" />
<Add option="-fno-tree-dse" />
<Add option="-fno-strict-aliasing" />
<Add option="-pipe -msse -msse2" />
<Add option="-mpreferred-stack-boundary=2" />
<Add option="-m32" />
<Add option="-DWX_PRECOMP" />
<Add directory="$(SvnRootDir)/common/include/" />
@ -387,7 +387,6 @@
<Unit filename="../vtlb.h" />
<Unit filename="../x86/BaseblockEx.cpp" />
<Unit filename="../x86/BaseblockEx.h" />
<Unit filename="../x86/aR3000A.S" />
<Unit filename="../x86/aVUzerorec.S" />
<Unit filename="../x86/aVif.S" />
<Unit filename="../x86/iCOP0.cpp" />
@ -420,7 +419,6 @@
<Option compiler="gcc" use="0" buildCommand="gcc $options -S $includes -c $file -o $object" />
</Unit>
<Unit filename="../x86/ir5900tables.cpp" />
<Unit filename="../x86/ix86-32/aR5900-32.S" />
<Unit filename="../x86/ix86-32/iCore-32.cpp" />
<Unit filename="../x86/ix86-32/iR5900-32.cpp" />
<Unit filename="../x86/ix86-32/iR5900Arit.cpp" />

View File

@ -79,7 +79,6 @@ struct MTGS_BufferedData
static __aligned(32) MTGS_BufferedData RingBuffer;
extern bool renderswitch;
static volatile bool gsIsOpened = false;
#ifdef RINGBUF_DEBUG_STACK
@ -98,6 +97,7 @@ mtgsThreadObject::mtgsThreadObject() :
, m_CopyCommandTally( 0 )
, m_CopyDataTally( 0 )
, m_RingBufferIsBusy( false )
, m_PluginOpened( false )
, m_QueuedFrames( 0 )
, m_packet_size( 0 )
, m_packet_ringpos( 0 )
@ -111,7 +111,7 @@ mtgsThreadObject::mtgsThreadObject() :
void mtgsThreadObject::OnStart()
{
gsIsOpened = false;
m_PluginOpened = false;
m_RingPos = 0;
m_WritePos = 0;
@ -187,14 +187,6 @@ struct PacketTagType
u32 data[3];
};
static void _clean_close_gs( void* obj )
{
if( !gsIsOpened ) return;
gsIsOpened = false;
if( g_plugins != NULL )
g_plugins->m_info[PluginId_GS].CommonBindings.Close();
}
static void dummyIrqCallback()
{
// dummy, because MTGS doesn't need this mess!
@ -203,7 +195,7 @@ static void dummyIrqCallback()
void mtgsThreadObject::OpenPlugin()
{
if( gsIsOpened ) return;
if( m_PluginOpened ) return;
memcpy_aligned( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) );
GSsetBaseMem( RingBuffer.Regs );
@ -225,7 +217,7 @@ void mtgsThreadObject::OpenPlugin()
throw Exception::PluginOpenError( PluginId_GS );
}
gsIsOpened = true;
m_PluginOpened = true;
m_sem_OpenDone.Post();
GSCSRr = 0x551B4000; // 0x55190000
@ -238,7 +230,6 @@ void mtgsThreadObject::ExecuteTaskInThread()
PacketTagType prevCmd;
#endif
pthread_cleanup_push( _clean_close_gs, this );
while( true )
{
m_sem_event.WaitRaw(); // ... because this does a cancel test itself..
@ -409,18 +400,34 @@ void mtgsThreadObject::ExecuteTaskInThread()
}
m_RingBufferIsBusy = false;
}
pthread_cleanup_pop( true );
}
void mtgsThreadObject::ClosePlugin()
{
if( !m_PluginOpened ) return;
m_PluginOpened = false;
if( g_plugins != NULL )
g_plugins->m_info[PluginId_GS].CommonBindings.Close();
}
void mtgsThreadObject::OnSuspendInThread()
{
_clean_close_gs( NULL );
ClosePlugin();
_parent::OnSuspendInThread();
}
void mtgsThreadObject::OnResumeInThread( bool isSuspended )
{
if( isSuspended )
OpenPlugin();
_parent::OnResumeInThread( isSuspended );
}
void mtgsThreadObject::OnCleanupInThread()
{
ClosePlugin();
_parent::OnCleanupInThread();
}
// Waits for the GS to empty out the entire ring buffer contents.
@ -783,7 +790,7 @@ void mtgsThreadObject::SendGameCRC( u32 crc )
void mtgsThreadObject::WaitForOpen()
{
if( gsIsOpened ) return;
if( m_PluginOpened ) return;
Resume();
// Two-phase timeout on MTGS opening, so that possible errors are handled

View File

@ -26,11 +26,6 @@ extern "C"
void so_call(coroutine_t coro);
void so_resume(void);
void so_exit(void);
void recRecompile( u32 startpc );
// aR3000A.S
void iopRecRecompile(u32 startpc);
}
#ifdef __LINUX__
@ -40,18 +35,6 @@ extern "C"
// aVUzerorec.S
void* SuperVUGetProgram(u32 startpc, int vuindex);
void SuperVUCleanupProgram(u32 startpc, int vuindex);
void svudispfn();
// aR3000A.S
void iopJITCompile();
void iopJITCompileInBlock();
void iopDispatcherReg();
// aR5900-32.S
void JITCompile();
void JITCompileInBlock();
void DispatcherReg();
void DispatcherEvent();
}
#endif

View File

@ -70,7 +70,8 @@ void psxShutdown() {
//psxCpu->Shutdown();
}
void psxException(u32 code, u32 bd) {
void __fastcall psxException(u32 code, u32 bd)
{
// PSXCPU_LOG("psxException %x: %x, %x", code, psxHu32(0x1070), psxHu32(0x1074));
//Console.WriteLn("!! psxException %x: %x, %x", code, psxHu32(0x1070), psxHu32(0x1074));
// Set the Cause

View File

@ -194,11 +194,11 @@ extern R3000Acpu *psxCpu;
extern R3000Acpu psxInt;
extern R3000Acpu psxRec;
void psxReset();
void psxShutdown();
void psxException(u32 code, u32 step);
extern void psxReset();
extern void psxShutdown();
extern void __fastcall psxException(u32 code, u32 step);
extern void psxBranchTest();
void psxMemReset();
extern void psxMemReset();
// Subsets
extern void (*psxBSC[64])();

View File

@ -61,8 +61,9 @@ extern void SysClearExecutionCache(); // clears recompiled execution caches!
extern u8 *SysMmapEx(uptr base, u32 size, uptr bounds, const char *caller="Unnamed");
extern void vSyncDebugStuff( uint frame );
//////////////////////////////////////////////////////////////////////////////////////////
//
// --------------------------------------------------------------------------------------
// Memory Protection (Used by VTLB, Recompilers, and Texture caches)
// --------------------------------------------------------------------------------------
#ifdef __LINUX__
# include <signal.h>
@ -87,6 +88,33 @@ extern void vSyncDebugStuff( uint frame );
# error PCSX2 - Unsupported operating system platform.
#endif
// --------------------------------------------------------------------------------------
// PCSX2_SEH - Defines existence of "built in" Structed Exception Handling support.
// --------------------------------------------------------------------------------------
// This should be available on Windows, via Microsoft or Intel compilers (I'm pretty sure Intel
// supports native SEH model). GNUC in Windows, or any compiler in a non-windows platform, will
// need to use setjmp/longjmp instead to exit recompiled code.
//
#if defined(_WIN32) && !defined(__GNUC__)
# define PCSX2_SEH
#else
# include <setjmp.h>
// Platforms without SEH need to use SetJmp / LongJmp to deal with exiting the recompiled
// code execution pipelines in an efficient manner, since standard C++ exceptions cannot
// unwind across dynamically recompiled code.
enum
{
SetJmp_Dispatcher = 1,
SetJmp_Exit,
};
extern jmp_buf SetJmp_RecExecute;
extern jmp_buf SetJmp_StateCheck;
#endif
class pxMessageBoxEvent;
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -264,6 +264,9 @@ void SysThreadBase::OnCleanupInThread()
m_RunningLock.Unlock();
}
void SysThreadBase::OnSuspendInThread() {}
void SysThreadBase::OnResumeInThread( bool isSuspended ) {}
void SysThreadBase::StateCheckInThread( bool isCancelable )
{
switch( m_ExecMode )
@ -338,6 +341,13 @@ SysCoreThread::SysCoreThread() :
SysCoreThread::~SysCoreThread() throw()
{
SysCoreThread::Cancel();
}
extern bool CoreCancelDamnit;
void SysCoreThread::Cancel( bool isBlocking )
{
CoreCancelDamnit = true;
_parent::Cancel();
}
@ -345,6 +355,7 @@ void SysCoreThread::Start()
{
if( g_plugins == NULL ) return;
g_plugins->Init();
CoreCancelDamnit = false; // belongs in OnStart actually, but I'm tired :P
_parent::Start();
}

View File

@ -93,6 +93,12 @@ public:
return m_ExecMode > ExecMode_Closed;
}
bool HasPendingStateChangeRequest()
{
ExecutionMode mode = m_ExecMode;
return (mode == ExecMode_Closing) || (mode == ExecMode_Pausing);
}
bool IsClosed() const { return !IsOpen(); }
ExecutionMode GetExecutionMode() const { return m_ExecMode; }
@ -164,6 +170,7 @@ public:
virtual void ApplySettings( const Pcsx2Config& src );
virtual void OnResumeReady();
virtual void Reset();
virtual void Cancel( bool isBlocking=true );
bool HasValidState()
{

View File

@ -50,7 +50,7 @@ void VU0MI_XGKICK() {
void VU0MI_XTOP() {
}
void vu0ExecMicro(u32 addr) {
void __fastcall vu0ExecMicro(u32 addr) {
VUM_LOG("vu0ExecMicro %x", addr);
if(VU0.VI[REG_VPU_STAT].UL & 0x1) {

View File

@ -46,7 +46,7 @@ void vu1ResetRegs()
static int count;
void vu1ExecMicro(u32 addr)
void __fastcall vu1ExecMicro(u32 addr)
{
while(VU0.VI[REG_VPU_STAT].UL & 0x100)
{

View File

@ -119,14 +119,14 @@ extern void (*VU1regs_UPPER_FD_11_TABLE[32])(_VURegsNum *VUregsn);
// VU0
extern void vu0ResetRegs();
extern void vu0ExecMicro(u32 addr);
extern void __fastcall vu0ExecMicro(u32 addr);
extern void vu0Exec(VURegs* VU);
extern void vu0Finish();
extern void recResetVU0( void );
// VU1
extern void vu1ResetRegs();
extern void vu1ExecMicro(u32 addr);
extern void __fastcall vu1ExecMicro(u32 addr);
extern void vu1Exec(VURegs* VU);
void VU0_UPPER_FD_00();

View File

@ -74,6 +74,10 @@ static __threadlocal bool _reentrant_lock = false;
// via messages.
void Pcsx2App::OnAssertFailure( const wxChar *file, int line, const wxChar *func, const wxChar *cond, const wxChar *msg )
{
// Used to allow the user to suppress future assertions during this application's session.
static bool disableAsserts = false;
if( disableAsserts ) return;
if( _reentrant_lock )
{
// Re-entrant assertions are bad mojo -- trap immediately.
@ -82,9 +86,6 @@ void Pcsx2App::OnAssertFailure( const wxChar *file, int line, const wxChar *func
_reentrant_lock = true;
// Used to allow the user to suppress future assertions during this application's session.
static bool disableAsserts = false;
wxString dbgmsg;
dbgmsg.reserve( 2048 );

View File

@ -312,9 +312,7 @@ int m_pendingFlushes = 0;
// and this one will magically follow suite. :)
void ConsoleLogFrame::Write( ConsoleColors color, const wxString& text )
{
//#ifdef PCSX2_SEH
pthread_testcancel();
//#endif
ScopedLock lock( m_QueueLock );
@ -355,7 +353,7 @@ void ConsoleLogFrame::Write( ConsoleColors color, const wxString& text )
++m_WaitingThreadsForFlush;
lock.Unlock();
if( !m_sem_QueueFlushed.WaitRaw( wxTimeSpan( 0,0,0,500 ) ) )
if( !m_sem_QueueFlushed.Wait( wxTimeSpan( 0,0,0,500 ) ) )
{
// Necessary since the main thread could grab the lock and process before
// the above function actually returns (gotta love threading!)

View File

@ -487,10 +487,6 @@
RelativePath="..\..\x86\aMicroVU.S"
>
</File>
<File
RelativePath="..\..\x86\aR3000A.S"
>
</File>
<File
RelativePath="..\..\x86\ix86-32\aR5900-32.S"
>
@ -896,14 +892,6 @@
RelativePath="..\..\x86\iCOP2.cpp"
>
</File>
<File
RelativePath="..\..\x86\iCore.cpp"
>
</File>
<File
RelativePath="..\..\x86\iCore.h"
>
</File>
<File
RelativePath="..\..\x86\iFPU.cpp"
>
@ -995,10 +983,6 @@
<Filter
Name="ix86-32"
>
<File
RelativePath="..\..\x86\ix86-32\iCore-32.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86-32\iR5900-32.cpp"
>
@ -1755,6 +1739,22 @@
>
</File>
</Filter>
<Filter
Name="iCore"
>
<File
RelativePath="..\..\x86\ix86-32\iCore-32.cpp"
>
</File>
<File
RelativePath="..\..\x86\iCore.cpp"
>
</File>
<File
RelativePath="..\..\x86\iCore.h"
>
</File>
</Filter>
</Filter>
<Filter
Name="Windows"

View File

@ -46,14 +46,22 @@ struct BASEBLOCKEX
class BaseBlocks
{
private:
protected:
typedef std::multimap<u32, uptr>::iterator linkiter_t;
// switch to a hash map later?
std::multimap<u32, uptr> links;
typedef std::multimap<u32, uptr>::iterator linkiter_t;
uptr recompiler;
std::vector<BASEBLOCKEX> blocks;
public:
BaseBlocks() :
recompiler( NULL )
, blocks(0)
{
blocks.reserve(0x4000);
}
BaseBlocks(uptr recompiler_) :
recompiler(recompiler_),
blocks(0)
@ -61,6 +69,11 @@ public:
blocks.reserve(0x4000);
}
void SetJITCompile( void (*recompiler_)() )
{
recompiler = (uptr)recompiler_;
}
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
int LastIndex (u32 startpc) const;
BASEBLOCKEX* GetByX86(uptr ip);

View File

@ -1,51 +0,0 @@
// iR3000a.c assembly routines
.intel_syntax noprefix
//////////////////////////////////////////////////////////////////////////
// Note that iR3000A.S and iR5900.S asm code is now identical. Only some
// function names and the following two defines should ever differ:
#define REGINFO psxRegs
#define RECLUT psxRecLUT
#define PCOFFSET 0x208 // this must always match what Pcsx2 displays at startup
//////////////////////////////////////////////////////////////////////////
// Preprocessor Mess!
.extern REGINFO
.extern RECLUT
.extern iopRecRecompile
//////////////////////////////////////////////////////////////////////////
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
.global iopJITCompile
iopJITCompile:
mov esi, dword ptr [REGINFO + PCOFFSET]
push esi
call iopRecRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [RECLUT+esi*4]
jmp dword ptr [ecx+ebx]
.global iopJITCompileInBlock
iopJITCompileInBlock:
jmp iopJITCompile
//////////////////////////////////////////////////////////////////////////
// called when jumping to variable pc address.
.globl iopDispatcherReg
iopDispatcherReg:
mov eax, dword ptr [REGINFO + PCOFFSET]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [RECLUT+eax*4]
jmp dword ptr [ecx+ebx]

View File

@ -21,7 +21,6 @@ SuperVUExecuteProgram:
add esp, 4
mov dword ptr [s_callstack], eax
call SuperVUGetProgram
mov s_vu1ebp, ebp
mov s_vu1esi, esi
mov s_vuedi, edi
mov s_vuebx, ebx
@ -38,7 +37,6 @@ SuperVUExecuteProgram:
SuperVUEndProgram:
// restore cpu state
ldmxcsr g_sseMXCSR
mov ebp, s_vu1ebp
mov esi, s_vu1esi
mov edi, s_vuedi
mov ebx, s_vuebx

View File

@ -28,6 +28,7 @@
#include "iCOP0.h"
namespace Interp = R5900::Interpreter::OpcodeImpl::COP0;
using namespace x86Emitter;
namespace R5900 {
namespace Dynarec {
@ -163,12 +164,14 @@ void recMFC0( void )
break;
case 1:
CALLFunc( (uptr)COP0_UpdatePCCR );
MOV32MtoR(EAX, (uptr)&cpuRegs.PERF.n.pcr0);
iFlushCall(FLUSH_NODESTROY);
xCALL( COP0_UpdatePCCR );
xMOV(eax, &cpuRegs.PERF.n.pcr0);
break;
case 3:
CALLFunc( (uptr)COP0_UpdatePCCR );
MOV32MtoR(EAX, (uptr)&cpuRegs.PERF.n.pcr1);
iFlushCall(FLUSH_NODESTROY);
xCALL( COP0_UpdatePCCR );
xMOV(eax, &cpuRegs.PERF.n.pcr1);
break;
}
_deleteEEreg(_Rt_, 0);
@ -240,8 +243,8 @@ void recMTC0()
{
case 12:
iFlushCall(FLUSH_NODESTROY);
//_flushCachedRegs(); //NOTE: necessary?
_callFunctionArg1((uptr)WriteCP0Status, MEM_CONSTTAG, g_cpuConstRegs[_Rt_].UL[0]);
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] );
xCALL( WriteCP0Status );
break;
case 9:
@ -254,9 +257,10 @@ void recMTC0()
switch(_Imm_ & 0x3F)
{
case 0:
CALLFunc( (uptr)COP0_UpdatePCCR );
MOV32ItoM((uptr)&cpuRegs.PERF.n.pccr, g_cpuConstRegs[_Rt_].UL[0]);
CALLFunc( (uptr)COP0_DiagnosticPCCR );
iFlushCall(FLUSH_NODESTROY);
xCALL( COP0_UpdatePCCR );
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
xCALL( COP0_DiagnosticPCCR );
break;
case 1:
@ -288,8 +292,8 @@ void recMTC0()
{
case 12:
iFlushCall(FLUSH_NODESTROY);
//_flushCachedRegs(); //NOTE: necessary?
_callFunctionArg1((uptr)WriteCP0Status, MEM_GPRTAG|_Rt_, 0);
_eeMoveGPRtoR(ECX, _Rt_);
xCALL( WriteCP0Status );
break;
case 9:
@ -302,9 +306,10 @@ void recMTC0()
switch(_Imm_ & 0x3F)
{
case 0:
CALLFunc( (uptr)COP0_UpdatePCCR );
iFlushCall(FLUSH_NODESTROY);
xCALL( COP0_UpdatePCCR );
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
CALLFunc( (uptr)COP0_DiagnosticPCCR );
xCALL( COP0_DiagnosticPCCR );
break;
case 1:

View File

@ -136,11 +136,13 @@ static void recCTC2(s32 info)
MOV16ItoM((uptr)&VU0.VI[REG_FBRST].UL,g_cpuConstRegs[_Rt_].UL[0]&0x0c0c);
break;
case REG_CMSAR1: // REG_CMSAR1
iFlushCall(FLUSH_NOCONST);// since CALLFunc
iFlushCall(FLUSH_NOCONST);
assert( _checkX86reg(X86TYPE_VI, REG_VPU_STAT, 0) < 0 &&
_checkX86reg(X86TYPE_VI, REG_TPC, 0) < 0 );
// Execute VU1 Micro SubRoutine
_callFunctionArg1((uptr)vu1ExecMicro, MEM_CONSTTAG, g_cpuConstRegs[_Rt_].UL[0]&0xffff);
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0]&0xffff );
xCALL( vu1ExecMicro );
break;
default:
{
@ -191,10 +193,10 @@ static void recCTC2(s32 info)
AND32ItoR(EAX,0x0C0C);
MOV16RtoM((uptr)&VU0.VI[REG_FBRST].UL,EAX);
break;
case REG_CMSAR1: // REG_CMSAR1
case REG_CMSAR1: // REG_CMSAR1 (Execute VU1micro Subroutine)
iFlushCall(FLUSH_NOCONST);
_eeMoveGPRtoR(EAX, _Rt_);
_callFunctionArg1((uptr)vu1ExecMicro, MEM_X86TAG|EAX, 0); // Execute VU1 Micro SubRoutine
_eeMoveGPRtoR(ECX, _Rt_);
xCALL( vu1ExecMicro );
break;
default:
_eeMoveGPRtoM((uptr)&VU0.VI[_Fs_].UL,_Rt_);

View File

@ -187,8 +187,6 @@ u8 _hasFreeXMMreg();
void _freeXMMregs();
int _getNumXMMwrite();
// uses MEM_MMXTAG/MEM_XMMTAG to differentiate between the regs
void _recPushReg(int mmreg);
void _signExtendSFtoM(u32 mem);
// returns new index of reg, lower 32 bits already in mmx
@ -196,41 +194,8 @@ void _signExtendSFtoM(u32 mem);
// a negative shift is for sign extension
int _signExtendXMMtoM(u32 to, x86SSERegType from, int candestroy); // returns true if reg destroyed
// Defines for passing register info
// only valid during writes. If write128, then upper 64bits are in an mmxreg
// (mmreg&0xf). Constant is used from gprreg ((mmreg>>16)&0x1f)
enum memtag
{
MEM_EECONSTTAG = 0x0100, // argument is a GPR and comes from g_cpuConstRegs
MEM_PSXCONSTTAG = 0x0200,
MEM_MEMORYTAG = 0x0400,
MEM_MMXTAG = 0x0800, // mmreg is mmxreg
MEM_XMMTAG = 0x8000, // mmreg is xmmreg
MEM_X86TAG = 0x4000, // ignored most of the time
MEM_GPRTAG = 0x2000, // argument is a GPR reg
MEM_CONSTTAG = 0x1000 // argument is a const
};
template<memtag tag> static __forceinline bool IS_REG(s32 reg)
{
return ((reg >= 0) && (reg & tag));
}
template<memtag tag> static __forceinline bool IS_REG(u32 reg)
{
return !!(reg & tag);
}
#define IS_EECONSTREG(reg) IS_REG<MEM_EECONSTTAG>(reg)
#define IS_PSXCONSTREG(reg) IS_REG<MEM_PSXCONSTTAG>(reg)
#define IS_MMXREG(reg) IS_REG<MEM_MMXTAG>(reg)
#define IS_XMMREG(reg) IS_REG<MEM_XMMTAG>(reg)
#define IS_X86REG(reg) IS_REG<MEM_X86TAG>(reg)
#define IS_GPRREG(reg) IS_REG<MEM_GPRTAG>(reg)
#define IS_CONSTREG(reg) IS_REG<MEM_CONSTTAG>(reg)
#define IS_MEMORYREG(reg) IS_REG<MEM_MEMORYTAG>(reg)
static const int MEM_MMXTAG = 0x0800; // mmreg is mmxreg
static const int MEM_XMMTAG = 0x8000; // mmreg is xmmreg
//////////////////////
// Instruction Info //
@ -425,12 +390,6 @@ extern u16 x86FpuState;
//////////////////////////////////////////////////////////////////////////
// Utility Functions -- that should probably be part of the Emitter.
// see MEM_X defines for argX format
extern void _callPushArg(u32 arg, uptr argmem); /// X86ARG is ignored for 32bit recs
extern void _callFunctionArg1(uptr fn, u32 arg1, uptr arg1mem);
extern void _callFunctionArg2(uptr fn, u32 arg1, u32 arg2, uptr arg1mem, uptr arg2mem);
extern void _callFunctionArg3(uptr fn, u32 arg1, u32 arg2, u32 arg3, uptr arg1mem, uptr arg2mem, uptr arg3mem);
// Moves 128 bits of data using EAX/EDX (used by iCOP2 only currently)
extern void _recMove128MtoM(u32 to, u32 from);

View File

@ -58,52 +58,11 @@ uptr psxhwLUT[0x10000];
// R3000A statics
int psxreclog = 0;
#ifdef _MSC_VER
static u32 g_temp;
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static __declspec(naked) void iopJITCompile()
{
__asm {
mov esi, dword ptr [psxRegs.pc]
push esi
call iopRecRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [psxRecLUT+esi*4]
jmp dword ptr [ecx+ebx]
}
}
static __declspec(naked) void iopJITCompileInBlock()
{
__asm {
jmp iopJITCompile
}
}
// called when jumping to variable psxpc address
static __declspec(naked) void iopDispatcherReg()
{
__asm {
mov eax, dword ptr [psxRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
jmp dword ptr [ecx+ebx]
}
}
#endif // _MSC_VER
static u8 *recMem = NULL; // the recompiled blocks will be here
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static BaseBlocks recBlocks((uptr)iopJITCompile);
static BaseBlocks recBlocks;
static u8 *recPtr = NULL;
u32 psxpc; // recompiler psxpc
int psxbranch; // set for branch
@ -140,6 +99,277 @@ static u32 psxdump = 0;
(((mem) < g_psxMaxRecMem && (psxRecLUT[(mem) >> 16] + (mem))) ? \
psxRecClearMem(mem) : 4)
// =====================================================================================================
// Dynamically Compiled Dispatchers - R3000A style
// =====================================================================================================
static void __fastcall iopRecRecompile( const u32 startpc );
static u32 s_store_ebp, s_store_esp;
// Recompiled code buffer for EE recompiler dispatchers!
static u8 __pagealigned iopRecDispatchers[0x1000];
typedef void DynGenFunc();
static DynGenFunc* iopDispatcherEvent = NULL;
static DynGenFunc* iopDispatcherReg = NULL;
static DynGenFunc* iopJITCompile = NULL;
static DynGenFunc* iopJITCompileInBlock = NULL;
static DynGenFunc* iopEnterRecompiledCode = NULL;
static DynGenFunc* iopExitRecompiledCode = NULL;
static void recEventTest()
{
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
_cpuBranchTest_Shared();
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
}
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( wxsFormat( L"(R3000A Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !IsDevBuild ) return;
// --------- EBP Here -----------
xCMP( ebp, &s_store_ebp );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, &s_store_ebp ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, &s_store_esp );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, &s_store_esp ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile()
{
pxAssertMsg( iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( ecx, &psxRegs.pc );
xCALL( iopRecRecompile );
xMOV( eax, &psxRegs.pc );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[psxRecLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
return (DynGenFunc*)retval;
}
static DynGenFunc* _DynGen_JITCompileInBlock()
{
u8* retval = xGetPtr();
xJMP( iopJITCompile );
return (DynGenFunc*)retval;
}
// called when jumping to variable pc address
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( eax, &psxRegs.pc );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[psxRecLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
return (DynGenFunc*)retval;
}
// --------------------------------------------------------------------------------------
// EnterRecompiledCode - dynamic compilation stub!
// --------------------------------------------------------------------------------------
// In Release Builds this literally generates the following code:
// push edi
// push esi
// push ebx
// jmp DispatcherReg
// pop ebx
// pop esi
// pop edi
//
// See notes on why this works in both GCC (aligned stack!) and other compilers (not-so-
// aligned stack!). In debug/dev builds the code gen is more complicated, as it constructs
// ebp stackframe mess, which allows for a complete backtrace from debug breakpoints (yay).
//
// Also, if you set PCSX2_IOP_FORCED_ALIGN_STACK to 1, the codegen for MSVC becomes slightly
// more complicated since it has to perform a full stack alignment on entry.
//
#if defined(__GNUG__) || defined(__DARWIN__)
# define PCSX2_ASSUME_ALIGNED_STACK 1
#else
# define PCSX2_ASSUME_ALIGNED_STACK 0
#endif
// Set to 0 for a speedup in release builds.
// [doesn't apply to GCC/Mac, which must always align]
#define PCSX2_IOP_FORCED_ALIGN_STACK 0 //1
// For overriding stackframe generation options in Debug builds (possibly useful for troubleshooting)
// Typically this value should be the same as IsDevBuild.
static const bool GenerateStackFrame = IsDevBuild;
static DynGenFunc* _DynGen_EnterRecompiledCode()
{
u8* retval = xGetPtr();
bool allocatedStack = GenerateStackFrame || PCSX2_IOP_FORCED_ALIGN_STACK;
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
// allocating any room on the stack for it (which is important since the IOP's entry
// code gets invoked quite a lot).
if( allocatedStack )
{
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
xSUB( esp, 0x20 );
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
}
else
{
// GCC Compiler:
// The frame pointer coming in from the EE's event test can be safely assumed to be
// aligned, since GCC always aligns stackframes. While handy in x86-64, where CALL + PUSH EBP
// results in a neatly realigned stack on entry to every function, unfortunately in x86-32
// this is usually worthless because CALL+PUSH leaves us 8 byte aligned instead (fail). So
// we have to do the usual set of stackframe alignments and simulated callstack mess
// *regardless*.
// MSVC/Intel compilers:
// The PCSX2_IOP_FORCED_ALIGN_STACK setting is 0, so we don't care. Just push regs like
// the good old days! (stack alignment will be indeterminate)
xPUSH( edi );
xPUSH( esi );
xPUSH( ebx );
allocatedStack = false;
}
uptr* imm = NULL;
if( allocatedStack )
{
if( GenerateStackFrame )
{
// Simulate a CALL function by pushing the call address and EBP onto the stack.
// This retains proper stacktrace and stack unwinding (handy in devbuilds!)
xMOV( ptr32[esp+0x0c], 0xffeeff );
imm = (uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
xMOV( ptr32[esp+0x08], ebp );
xLEA( ebp, ptr32[esp+0x08] );
}
}
if( IsDevBuild )
{
xMOV( &s_store_esp, esp );
xMOV( &s_store_ebp, ebp );
}
xJMP( iopDispatcherReg );
if( imm != NULL )
*imm = (uptr)xGetPtr();
// ----------------------
// ----> Cleanup! ---->
iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
if( allocatedStack )
{
// pop the nested "simulated call" stackframe, if needed:
if( GenerateStackFrame ) xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
}
else
{
xPOP( ebx );
xPOP( esi );
xPOP( edi );
}
xRET();
return (DynGenFunc*)retval;
}
static void _DynGen_Dispatchers()
{
// In case init gets called multiple times:
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( iopRecDispatchers );
xSetPtr( iopRecDispatchers );
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
// most and stand to benefit from strong alignment and direct referencing.
iopDispatcherEvent = (DynGenFunc*)xGetPtr();
xCALL( recEventTest );
iopDispatcherReg = _DynGen_DispatcherReg();
iopJITCompile = _DynGen_JITCompile();
iopJITCompileInBlock = _DynGen_JITCompileInBlock();
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadOnly, true );
recBlocks.SetJITCompile( iopJITCompile );
}
////////////////////////////////////////////////////
using namespace R3000A;
#include "Utilities/AsciiFile.h"
@ -350,7 +580,10 @@ void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr)
void _psxFlushCall(int flushtype)
{
_freeX86regs();
// x86-32 ABI : These registers are not preserved across calls:
_freeX86reg( EAX );
_freeX86reg( ECX );
_freeX86reg( EDX );
if( flushtype & FLUSH_CACHED_REGS )
_psxFlushConstRegs();
@ -436,11 +669,6 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
_psxFlushCall(FLUSH_NODESTROY);
CALLFunc((uptr)zeroEx);
}
// Bios Call: Force the IOP to do a Branch Test ASAP.
// Important! This helps prevent game freeze-ups during boot-up and stage loads.
// Note: Fixes to cdvd have removed the need for this code.
//MOV32MtoR( EAX, (uptr)&psxRegs.cycle );
//MOV32RtoM( (uptr)&g_psxNextBranchCycle, EAX );
}
return;
}
@ -551,13 +779,14 @@ static void recAlloc()
throw Exception::OutOfMemory( "R3000a Init > Failed to allocate memory for pInstCache." );
ProfilerRegisterSource( "IOPRec", recMem, RECMEM_SIZE );
_DynGen_Dispatchers();
}
void recResetIOP()
{
// calling recResetIOP without first calling recInit is bad mojo.
jASSUME( recMem != NULL );
jASSUME( m_recBlockAlloc != NULL );
pxAssert( recMem != NULL );
pxAssert( m_recBlockAlloc != NULL );
DevCon.Status( "iR3000A Resetting recompiler memory and structures" );
@ -630,7 +859,7 @@ static void recExecute()
//for (;;) R3000AExecute();
}
static __forceinline s32 recExecuteBlock( s32 eeCycles )
static __noinline s32 recExecuteBlock( s32 eeCycles )
{
psxBreak = 0;
psxCycleEE = eeCycles;
@ -639,38 +868,23 @@ static __forceinline s32 recExecuteBlock( s32 eeCycles )
// The IOP does not use mmx/xmm registers, so we don't modify the status
// of the g_EEFreezeRegs here.
#ifdef _MSC_VER
__asm
{
push ebx
push esi
push edi
// [TODO] recExecuteBlock could be replaced by a direct call to the iopEnterRecompiledCode()
// (by assigning its address to the psxRec structure). But for that to happen, we need
// to move psxBreak/psxCycleEE update code to emitted assembly code. >_< --air
call iopDispatcherReg
// Likely Disasm, as borrowed from MSVC:
pop edi
pop esi
pop ebx
}
#else
__asm__ __volatile__
(
// We should be able to rely on GAS syntax (the register clobber list) as a
// replacement for manual push/pop of unpreserved registers.
// Entry:
// mov eax,dword ptr [esp+4]
// mov dword ptr [psxBreak (0E88DCCh)],0
// mov dword ptr [psxCycleEE (832A84h)],eax
".intel_syntax noprefix\n"
//"push ebx\n"
//"push esi\n"
//"push edi\n"
// Exit:
// mov ecx,dword ptr [psxBreak (0E88DCCh)]
// mov edx,dword ptr [psxCycleEE (832A84h)]
// lea eax,[edx+ecx]
"call iopDispatcherReg\n"
//"pop edi\n"
//"pop esi\n"
//"pop ebx\n"
".att_syntax\n"
: : : "eax", "ebx", "ecx", "edx", "esi", "edi", "memory" );
#endif
iopEnterRecompiledCode();
return psxBreak + psxCycleEE;
}
@ -690,7 +904,7 @@ static __forceinline u32 psxRecClearMem(u32 pc)
u32 lowerextent = pc, upperextent = pc + 4;
int blockidx = recBlocks.Index(pc);
jASSUME(blockidx != -1);
pxAssert(blockidx != -1);
while (BASEBLOCKEX* pexblock = recBlocks[blockidx - 1]) {
if (pexblock->startpc + pexblock->size * 4 <= lowerextent)
@ -709,14 +923,14 @@ static __forceinline u32 psxRecClearMem(u32 pc)
recBlocks.Remove(blockidx);
}
#ifdef PCSX2_DEVBUILD
blockidx=0;
while(BASEBLOCKEX* pexblock = recBlocks[blockidx++])
{
if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4) {
Console.Error("Impossible block clearing failure");
jASSUME(0);
DevCon.Error("Impossible block clearing failure");
pxFailDev( "Impossible block clearing failure" );
}
}
#endif
iopClearRecLUT(PSX_GETBLOCK(lowerextent), (upperextent - lowerextent) / 4);
@ -799,12 +1013,8 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
MOV32RtoM((uptr)&psxRegs.cycle, ECX); // update cycles
MOV32RtoM((uptr)&psxCycleEE, EAX);
j8Ptr[2] = JG8( 0 ); // jump if psxCycleEE > 0
RET(); // returns control to the EE
// Continue onward with branching here:
x86SetJ8( j8Ptr[2] );
// jump if psxCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE)
xJLE( iopExitRecompiledCode );
// check if an event is pending
SUB32MtoR(ECX, (uptr)&g_psxNextBranchCycle);
@ -846,7 +1056,9 @@ void rpsxSYSCALL()
MOV32ItoM((uptr)&psxRegs.pc, psxpc - 4);
_psxFlushCall(FLUSH_NODESTROY);
_callFunctionArg2((uptr)psxException, MEM_CONSTTAG, MEM_CONSTTAG, 0x20, psxbranch==1);
xMOV( ecx, 0x20 ); // exception code
xMOV( edx, psxbranch==1 ); // branch delay slot?
xCALL( psxException );
CMP32ItoM((uptr)&psxRegs.pc, psxpc-4);
j8Ptr[0] = JE8(0);
@ -867,7 +1079,9 @@ void rpsxBREAK()
MOV32ItoM((uptr)&psxRegs.pc, psxpc - 4);
_psxFlushCall(FLUSH_NODESTROY);
_callFunctionArg2((uptr)psxBREAK, MEM_CONSTTAG, MEM_CONSTTAG, 0x24, psxbranch==1);
xMOV( ecx, 0x24 ); // exception code
xMOV( edx, psxbranch==1 ); // branch delay slot?
xCALL( psxException );
CMP32ItoM((uptr)&psxRegs.pc, psxpc-4);
j8Ptr[0] = JE8(0);
@ -935,7 +1149,7 @@ static void printfn()
#endif
}
void iopRecRecompile(u32 startpc)
static void __fastcall iopRecRecompile( const u32 startpc )
{
u32 i;
u32 branchTo;

View File

@ -594,321 +594,23 @@ void rpsxDIVU_(int info) { rpsxDIVsuper(info, 0); }
PSXRECOMPILE_CONSTCODE3_PENALTY(DIVU, 1, psxInstCycles_Div);
//// LoadStores
#ifdef PCSX2_VIRTUAL_MEM
// VM load store functions (fastest)
//#define REC_SLOWREAD
//#define REC_SLOWWRITE
int _psxPrepareReg(int gprreg)
{
return 0;
}
static u32 s_nAddMemOffset = 0;
static __forceinline void SET_HWLOC_R3000A() {
x86SetJ8(j8Ptr[0]);
SHR32ItoR(ECX, 3);
if( s_nAddMemOffset ) ADD32ItoR(ECX, s_nAddMemOffset);
}
int rpsxSetMemLocation(int regs, int mmreg)
{
s_nAddMemOffset = 0;
MOV32MtoR( ECX, (int)&psxRegs.GPR.r[ regs ] );
if ( _Imm_ != 0 ) ADD32ItoR( ECX, _Imm_ );
SHL32ItoR(ECX, 3);
j8Ptr[0] = JS8(0);
SHR32ItoR(ECX, 3);
AND32ItoR(ECX, 0x1fffff); // 2Mb
return 1;
}
void recLoad32(u32 bit, u32 sign)
{
int mmreg = -1;
#ifdef REC_SLOWREAD
_psxFlushConstReg(_Rs_);
#else
if( PSX_IS_CONST1( _Rs_ ) ) {
// do const processing
int ineax = 0;
_psxOnWriteReg(_Rt_);
mmreg = EAX;
switch(bit) {
case 8: ineax = psxRecMemConstRead8(mmreg, g_psxConstRegs[_Rs_]+_Imm_, sign); break;
case 16:
assert( (g_psxConstRegs[_Rs_]+_Imm_) % 2 == 0 );
ineax = psxRecMemConstRead16(mmreg, g_psxConstRegs[_Rs_]+_Imm_, sign);
break;
case 32:
assert( (g_psxConstRegs[_Rs_]+_Imm_) % 4 == 0 );
ineax = psxRecMemConstRead32(mmreg, g_psxConstRegs[_Rs_]+_Imm_);
break;
}
if( _Rt_ ) MOV32RtoM( (int)&psxRegs.GPR.r[ _Rt_ ], EAX );
}
else
#endif
{
int dohw;
int mmregs = _psxPrepareReg(_Rs_);
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
dohw = rpsxSetMemLocation(_Rs_, mmregs);
switch(bit) {
case 8:
if( sign ) MOVSX32Rm8toROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
else MOVZX32Rm8toROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
break;
case 16:
if( sign ) MOVSX32Rm16toROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
else MOVZX32Rm16toROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
break;
case 32:
MOV32RmtoROffset(EAX, ECX, PS2MEM_PSX_+s_nAddMemOffset);
break;
}
if( dohw ) {
j8Ptr[1] = JMP8(0);
SET_HWLOC_R3000A();
switch(bit) {
case 8:
CALLFunc( (int)psxRecMemRead8 );
if( sign ) MOVSX32R8toR(EAX, EAX);
else MOVZX32R8toR(EAX, EAX);
break;
case 16:
CALLFunc( (int)psxRecMemRead16 );
if( sign ) MOVSX32R16toR(EAX, EAX);
else MOVZX32R16toR(EAX, EAX);
break;
case 32:
CALLFunc( (int)psxRecMemRead32 );
break;
}
x86SetJ8(j8Ptr[1]);
}
if( _Rt_ )
MOV32RtoM( (int)&psxRegs.GPR.r[ _Rt_ ], EAX );
}
}
void rpsxLB() { recLoad32(8, 1); }
void rpsxLBU() { recLoad32(8, 0); }
void rpsxLH() { recLoad32(16, 1); }
void rpsxLHU() { recLoad32(16, 0); }
void rpsxLW() { recLoad32(32, 0); }
extern void rpsxMemConstClear(u32 mem);
// check if mem is executable, and clear it
__declspec(naked) void rpsxWriteMemClear()
{
_asm {
mov edx, ecx
shr edx, 14
and dl, 0xfc
add edx, psxRecLUT
test dword ptr [edx], 0xffffffff
jnz Clear32
ret
Clear32:
// recLUT[mem>>16] + (mem&0xfffc)
mov edx, dword ptr [edx]
mov eax, ecx
and eax, 0xfffc
// edx += 2*eax
shl eax, 1
add edx, eax
cmp dword ptr [edx], 0
je ClearRet
sub esp, 4
mov dword ptr [esp], edx
call psxRecClearMem
add esp, 4
ClearRet:
ret
}
}
extern u32 s_psxBlockCycles;
void recStore(int bit)
{
#ifdef REC_SLOWWRITE
_psxFlushConstReg(_Rs_);
#else
if( PSX_IS_CONST1( _Rs_ ) ) {
u8* pjmpok;
u32 addr = g_psxConstRegs[_Rs_]+_Imm_;
int doclear = 0;
if( !(addr & 0x10000000) ) {
// check g_psxWriteOk
CMP32ItoM((uptr)&g_psxWriteOk, 0);
pjmpok = JE8(0);
}
switch(bit) {
case 8:
if( PSX_IS_CONST1(_Rt_) ) doclear = psxRecMemConstWrite8(addr, MEM_PSXCONSTTAG|(_Rt_<<16));
else {
_psxMoveGPRtoR(EAX, _Rt_);
doclear = psxRecMemConstWrite8(addr, EAX);
}
break;
case 16:
assert( (addr)%2 == 0 );
if( PSX_IS_CONST1(_Rt_) ) doclear = psxRecMemConstWrite16(addr, MEM_PSXCONSTTAG|(_Rt_<<16));
else {
_psxMoveGPRtoR(EAX, _Rt_);
doclear = psxRecMemConstWrite16(addr, EAX);
}
break;
case 32:
assert( (addr)%4 == 0 );
if( PSX_IS_CONST1(_Rt_) ) doclear = psxRecMemConstWrite32(addr, MEM_PSXCONSTTAG|(_Rt_<<16));
else {
_psxMoveGPRtoR(EAX, _Rt_);
doclear = psxRecMemConstWrite32(addr, EAX);
}
break;
}
if( !(addr & 0x10000000) ) {
if( doclear ) rpsxMemConstClear((addr)&~3);
x86SetJ8(pjmpok);
}
}
else
#endif
{
int dohw;
int mmregs = _psxPrepareReg(_Rs_);
dohw = rpsxSetMemLocation(_Rs_, mmregs);
CMP32ItoM((uptr)&g_psxWriteOk, 0);
u8* pjmpok = JE8(0);
if( PSX_IS_CONST1( _Rt_ ) ) {
switch(bit) {
case 8: MOV8ItoRmOffset(ECX, g_psxConstRegs[_Rt_], PS2MEM_PSX_+s_nAddMemOffset); break;
case 16: MOV16ItoRmOffset(ECX, g_psxConstRegs[_Rt_], PS2MEM_PSX_+s_nAddMemOffset); break;
case 32: MOV32ItoRmOffset(ECX, g_psxConstRegs[_Rt_], PS2MEM_PSX_+s_nAddMemOffset); break;
}
}
else {
switch(bit) {
case 8:
MOV8MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]);
MOV8RtoRmOffset(ECX, EAX, PS2MEM_PSX_+s_nAddMemOffset);
break;
case 16:
MOV16MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]);
MOV16RtoRmOffset(ECX, EAX, PS2MEM_PSX_+s_nAddMemOffset);
break;
case 32:
MOV32MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]);
MOV32RtoRmOffset(ECX, EAX, PS2MEM_PSX_+s_nAddMemOffset);
break;
}
}
if( s_nAddMemOffset ) ADD32ItoR(ECX, s_nAddMemOffset);
CMP32MtoR(ECX, (uptr)&g_psxMaxRecMem);
j8Ptr[1] = JAE8(0);
if( bit < 32 ) AND8ItoR(ECX, 0xfc);
CALLFunc((u32)rpsxWriteMemClear);
if( dohw ) {
j8Ptr[2] = JMP8(0);
SET_HWLOC_R3000A();
if( PSX_IS_CONST1(_Rt_) ) {
switch(bit) {
case 8: MOV8ItoR(EAX, g_psxConstRegs[_Rt_]); break;
case 16: MOV16ItoR(EAX, g_psxConstRegs[_Rt_]); break;
case 32: MOV32ItoR(EAX, g_psxConstRegs[_Rt_]); break;
}
}
else {
switch(bit) {
case 8: MOV8MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]); break;
case 16: MOV16MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]); break;
case 32: MOV32MtoR(EAX, (int)&psxRegs.GPR.r[ _Rt_ ]); break;
}
}
if( s_nAddMemOffset != 0 ) ADD32ItoR(ECX, s_nAddMemOffset);
// some type of hardware write
switch(bit) {
case 8: CALLFunc( (int)psxRecMemWrite8 ); break;
case 16: CALLFunc( (int)psxRecMemWrite16 ); break;
case 32: CALLFunc( (int)psxRecMemWrite32 ); break;
}
x86SetJ8(j8Ptr[2]);
}
x86SetJ8(j8Ptr[1]);
x86SetJ8(pjmpok);
}
}
void rpsxSB() { recStore(8); }
void rpsxSH() { recStore(16); }
void rpsxSW() { recStore(32); }
REC_FUNC(LWL);
REC_FUNC(LWR);
REC_FUNC(SWL);
REC_FUNC(SWR);
#else
// TLB loadstore functions
REC_FUNC(LWL);
REC_FUNC(LWR);
REC_FUNC(SWL);
REC_FUNC(SWR);
using namespace x86Emitter;
static void rpsxLB()
{
_psxDeleteReg(_Rs_, 1);
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg1((uptr)iopMemRead8, X86ARG1|MEM_X86TAG, 0);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xCALL( iopMemRead8 ); // returns value in EAX
if (_Rt_) {
MOVSX32R8toR(EAX, EAX);
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
@ -922,9 +624,9 @@ static void rpsxLBU()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg1((uptr)iopMemRead8, X86ARG1|MEM_X86TAG, 0);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xCALL( iopMemRead8 ); // returns value in EAX
if (_Rt_) {
MOVZX32R8toR(EAX, EAX);
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
@ -938,9 +640,9 @@ static void rpsxLH()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg1((uptr)iopMemRead16, X86ARG1|MEM_X86TAG, 0);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xCALL( iopMemRead16 ); // returns value in EAX
if (_Rt_) {
MOVSX32R16toR(EAX, EAX);
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
@ -954,9 +656,9 @@ static void rpsxLHU()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg1((uptr)iopMemRead16, X86ARG1|MEM_X86TAG, 0);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xCALL( iopMemRead16 ); // returns value in EAX
if (_Rt_) {
MOVZX32R16toR(EAX, EAX);
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
@ -971,13 +673,13 @@ static void rpsxLW()
_psxDeleteReg(_Rt_, 0);
_psxFlushCall(FLUSH_EVERYTHING);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
TEST32ItoR(X86ARG1, 0x10000000);
TEST32ItoR(ECX, 0x10000000);
j8Ptr[0] = JZ8(0);
_callFunctionArg1((uptr)iopMemRead32, X86ARG1|MEM_X86TAG, 0);
xCALL( iopMemRead32 ); // returns value in EAX
if (_Rt_) {
MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
}
@ -985,11 +687,11 @@ static void rpsxLW()
x86SetJ8(j8Ptr[0]);
// read from psM directly
AND32ItoR(X86ARG1, 0x1fffff);
ADD32ItoR(X86ARG1, (uptr)psxM);
AND32ItoR(ECX, 0x1fffff);
ADD32ItoR(ECX, (uptr)psxM);
MOV32RmtoR( X86ARG1, X86ARG1 );
MOV32RtoM( (uptr)&psxRegs.GPR.r[_Rt_], X86ARG1);
MOV32RmtoR( ECX, ECX );
MOV32RtoM( (uptr)&psxRegs.GPR.r[_Rt_], ECX);
x86SetJ8(j8Ptr[1]);
PSX_DEL_CONST(_Rt_);
@ -1000,9 +702,10 @@ static void rpsxSB()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg2((uptr)iopMemWrite8, X86ARG1|MEM_X86TAG, MEM_MEMORYTAG, 0, (uptr)&psxRegs.GPR.r[_Rt_]);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xMOV( edx, &psxRegs.GPR.r[_Rt_] );
xCALL( iopMemWrite8 );
}
static void rpsxSH()
@ -1010,9 +713,10 @@ static void rpsxSH()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg2((uptr)iopMemWrite16, X86ARG1|MEM_X86TAG, MEM_MEMORYTAG, 0, (uptr)&psxRegs.GPR.r[_Rt_]);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xMOV( edx, &psxRegs.GPR.r[_Rt_] );
xCALL( iopMemWrite16 );
}
static void rpsxSW()
@ -1020,13 +724,12 @@ static void rpsxSW()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(X86ARG1, _Imm_);
_callFunctionArg2((uptr)iopMemWrite32, X86ARG1|MEM_X86TAG, MEM_MEMORYTAG, 0, (uptr)&psxRegs.GPR.r[_Rt_]);
MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
if (_Imm_) ADD32ItoR(ECX, _Imm_);
xMOV( edx, &psxRegs.GPR.r[_Rt_] );
xCALL( iopMemWrite32 );
}
#endif // end load store
//// SLL
void rpsxSLL_const()
{

View File

@ -1,57 +0,0 @@
// iR5900.c assembly routines
.intel_syntax noprefix
//////////////////////////////////////////////////////////////////////////
// Note that iR3000A.S and iR5900.S asm code is now identical. Only some
// function names and the following two defines should ever differ:
#define REGINFO cpuRegs
#define RECLUT recLUT
#define PCOFFSET 0x2a8 // this must always match what Pcsx2 displays at startup
//////////////////////////////////////////////////////////////////////////
// Preprocessor Mess!
.extern REGINFO
.extern RECLUT
.extern recRecompile
.extern recEventTest
//////////////////////////////////////////////////////////////////////////
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
.global JITCompile
JITCompile:
mov esi, dword ptr [REGINFO + PCOFFSET]
push esi
call recRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [RECLUT+esi*4]
jmp dword ptr [ecx+ebx]
.global JITCompileInBlock
JITCompileInBlock:
jmp JITCompile
//////////////////////////////////////////////////////////////////////////
// called when jumping to variable pc address.
.globl DispatcherReg
DispatcherReg:
mov eax, dword ptr [REGINFO + PCOFFSET]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [RECLUT+eax*4]
jmp dword ptr [ecx+ebx]
.globl DispatcherEvent
DispatcherEvent:
call recEventTest
jmp DispatcherReg

View File

@ -236,7 +236,8 @@ void _flushConstRegs()
int _allocX86reg(int x86reg, int type, int reg, int mode)
{
int i;
assert( reg >= 0 && reg < 32 );
pxAssertDev( reg >= 0 && reg < 32, "Register index out of bounds." );
pxAssertDev( x86reg != ESP && x86reg != EBP, "Allocation of ESP/EBP is not allowed!" );
// don't alloc EAX and ESP,EBP if MODE_NOFRAME
int oldmode = mode;
@ -448,15 +449,11 @@ void _freeX86reg(int x86reg)
x86regs[x86reg].inuse = 0;
}
void _freeX86regs() {
int i;
for (i=0; i<iREGCNT_GPR; i++) {
if (!x86regs[i].inuse) continue;
void _freeX86regs()
{
for (int i=0; i<iREGCNT_GPR; i++)
_freeX86reg(i);
}
}
// MMX Caching
_mmxregs mmxregs[8], s_saveMMXregs[8];
@ -863,88 +860,6 @@ void SetFPUstate() {
}
}
__forceinline void _callPushArg(u32 arg, uptr argmem)
{
if( IS_X86REG(arg) ) {
PUSH32R(arg&0xff);
}
else if( IS_CONSTREG(arg) ) {
PUSH32I(argmem);
}
else if( IS_GPRREG(arg) ) {
SUB32ItoR(ESP, 4);
_eeMoveGPRtoRm(ESP, arg&0xff);
}
else if( IS_XMMREG(arg) ) {
SUB32ItoR(ESP, 4);
SSEX_MOVD_XMM_to_Rm(ESP, arg&0xf);
}
else if( IS_MMXREG(arg) ) {
SUB32ItoR(ESP, 4);
MOVD32MMXtoRm(ESP, arg&0xf);
}
else if( IS_EECONSTREG(arg) ) {
PUSH32I(g_cpuConstRegs[(arg>>16)&0x1f].UL[0]);
}
else if( IS_PSXCONSTREG(arg) ) {
PUSH32I(g_psxConstRegs[(arg>>16)&0x1f]);
}
else if( IS_MEMORYREG(arg) ) {
PUSH32M(argmem);
}
else {
assert( (arg&0xfff0) == 0 );
// assume it is a GPR reg
PUSH32R(arg&0xf);
}
}
__forceinline void _callFunctionArg1(uptr fn, u32 arg1, uptr arg1mem)
{
_callPushArg(arg1, arg1mem);
CALLFunc((uptr)fn);
ADD32ItoR(ESP, 4);
}
__forceinline void _callFunctionArg2(uptr fn, u32 arg1, u32 arg2, uptr arg1mem, uptr arg2mem)
{
_callPushArg(arg2, arg2mem);
_callPushArg(arg1, arg1mem);
CALLFunc((uptr)fn);
ADD32ItoR(ESP, 8);
}
__forceinline void _callFunctionArg3(uptr fn, u32 arg1, u32 arg2, u32 arg3, uptr arg1mem, uptr arg2mem, uptr arg3mem)
{
_callPushArg(arg3, arg3mem);
_callPushArg(arg2, arg2mem);
_callPushArg(arg1, arg1mem);
CALLFunc((uptr)fn);
ADD32ItoR(ESP, 12);
}
void _recPushReg(int mmreg)
{
if( IS_XMMREG(mmreg) ) {
SUB32ItoR(ESP, 4);
SSEX_MOVD_XMM_to_Rm(ESP, mmreg&0xf);
}
else if( IS_MMXREG(mmreg) ) {
SUB32ItoR(ESP, 4);
MOVD32MMXtoRm(ESP, mmreg&0xf);
}
else if( IS_EECONSTREG(mmreg) ) {
PUSH32I(g_cpuConstRegs[(mmreg>>16)&0x1f].UL[0]);
}
else if( IS_PSXCONSTREG(mmreg) ) {
PUSH32I(g_psxConstRegs[(mmreg>>16)&0x1f]);
}
else {
assert( (mmreg&0xfff0) == 0 );
PUSH32R(mmreg);
}
}
void _signExtendSFtoM(u32 mem)
{
LAHF();

View File

@ -79,8 +79,7 @@ static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static u32 *recRAMCopy = NULL;
void JITCompile();
static BaseBlocks recBlocks((uptr)JITCompile);
static BaseBlocks recBlocks;
static u8* recPtr = NULL;
static u32 *recConstBufPtr = NULL;
EEINST* s_pInstCache = NULL;
@ -310,6 +309,200 @@ u32* recGetImm64(u32 hi, u32 lo)
return imm64;
}
// =====================================================================================================
// R5900 Dispatchers
// =====================================================================================================
static void __fastcall recRecompile( const u32 startpc );
static u32 g_lastpc = 0;
static u32 s_store_ebp, s_store_esp;
// Recompiled code buffer for EE recompiler dispatchers!
static u8 __pagealigned eeRecDispatchers[0x1000];
typedef void DynGenFunc();
static DynGenFunc* DispatcherEvent = NULL;
static DynGenFunc* DispatcherReg = NULL;
static DynGenFunc* JITCompile = NULL;
static DynGenFunc* JITCompileInBlock = NULL;
static DynGenFunc* EnterRecompiledCode = NULL;
static DynGenFunc* ExitRecompiledCode = NULL;
static void recEventTest()
{
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
_cpuBranchTest_Shared();
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
}
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( wxsFormat( L"(R5900 Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !IsDevBuild ) return;
// --------- EBP Here -----------
xCMP( ebp, &s_store_ebp );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, &s_store_ebp ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, &s_store_esp );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, &s_store_esp ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile()
{
pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( ecx, &cpuRegs.pc );
xCALL( recRecompile );
xMOV( eax, &cpuRegs.pc );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[recLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
return (DynGenFunc*)retval;
}
static DynGenFunc* _DynGen_JITCompileInBlock()
{
u8* retval = xGetPtr();
xJMP( JITCompile );
return (DynGenFunc*)retval;
}
// called when jumping to variable pc address
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( eax, &cpuRegs.pc );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[recLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
return (DynGenFunc*)retval;
}
static DynGenFunc* _DynGen_EnterRecompiledCode()
{
u8* retval = xGetPtr();
// "standard" frame pointer setup for aligned stack: Record the original
// esp into ebp, and then align esp. ebp references the original esp base
// for the duration of our function, and is used to restore the original
// esp before returning from the function
// Optimization: We "allocate" 0x10 bytes of stack ahead of time here, which we can
// use for supplying parameters to cdecl functions.
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
// First 0x10 is for esi, edi, etc. Second 0x10 is for the return address and ebp. The
// third 0x10 is for C-style CDECL calls we might make from the recompiler
// (parameters for those calls can be stored there!)
xSUB( esp, 0x30 );
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
// Simulate a CALL function by pushing the call address and EBP onto the stack.
xMOV( ptr32[esp+0x1c], 0xffeeff );
uptr& imm = *(uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
xMOV( ptr32[esp+0x18], ebp );
xLEA( ebp, ptr32[esp+0x18] );
xMOV( &s_store_esp, esp );
xMOV( &s_store_ebp, ebp );
xJMP( ptr32[&DispatcherReg] );
imm = (uptr)xGetPtr();
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
xRET();
return (DynGenFunc*)retval;
}
static void _DynGen_Dispatchers()
{
// In case init gets called multiple times:
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( eeRecDispatchers );
xSetPtr( eeRecDispatchers );
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
// most and stand to benefit from strong alignment and direct referencing.
DispatcherEvent = (DynGenFunc*)xGetPtr();
xCALL( recEventTest );
DispatcherReg = _DynGen_DispatcherReg();
JITCompile = _DynGen_JITCompile();
JITCompileInBlock = _DynGen_JITCompileInBlock();
EnterRecompiledCode = _DynGen_EnterRecompiledCode();
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadOnly, true );
recBlocks.SetJITCompile( JITCompile );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
static const int REC_CACHEMEM = 0x01000000;
@ -377,6 +570,7 @@ static void recAlloc()
// No errors.. Proceed with initialization:
ProfilerRegisterSource( "EERec", recMem, REC_CACHEMEM+0x1000 );
_DynGen_Dispatchers();
x86FpuState = FPU_STATE;
}
@ -479,141 +673,85 @@ void recStep( void )
{
}
extern "C"
#ifndef PCSX2_SEH
// <--- setjmp/longjmp model <---
#include "GS.h"
#include "System/SysThreads.h"
static void StateThreadCheck_LongJmp()
{
void recEventTest()
{
#ifdef PCSX2_DEVBUILD
// dont' remove this check unless doing an official release
if( g_globalXMMSaved || g_globalMMXSaved)
{
DevCon.Error("PCSX2 Foopah! Frozen regs have not been restored!!!");
DevCon.Error("g_globalXMMSaved = %d,g_globalMMXSaved = %d", g_globalXMMSaved, g_globalMMXSaved);
}
assert( !g_globalXMMSaved && !g_globalMMXSaved);
#endif
setjmp( SetJmp_StateCheck );
// Perform counters, interrupts, and IOP updates:
_cpuBranchTest_Shared();
int oldstate;
#ifdef PCSX2_DEVBUILD
assert( !g_globalXMMSaved && !g_globalMMXSaved);
#endif
}
}
// Important! Most of the console logging and such has cancel points in it. This is great
// in Windows, where SEH lets us safely kill a thread from anywhere we want. This is bad
// in Linux, which cannot have a C++ exception cross the recompiler. Hence the changing
// of the cancelstate here!
////////////////////////////////////////////////////
static u32 g_lastpc = 0;
#ifdef _MSC_VER
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static __naked void JITCompile()
{
__asm {
mov esi, dword ptr [cpuRegs.pc]
push esi
call recRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [recLUT+esi*4]
jmp dword ptr [ecx+ebx]
pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, &oldstate );
mtgsThread.RethrowException();
SysCoreThread::Get().StateCheckInThread();
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate );
}
}
static __naked void JITCompileInBlock()
{
__asm {
jmp JITCompile
}
}
// called when jumping to variable pc address
static void __naked DispatcherReg()
{
__asm {
mov eax, dword ptr [cpuRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [recLUT+eax*4]
jmp dword ptr [ecx+ebx]
}
}
// [TODO] : Replace these functions with x86Emitter-generated code and we can compound this
// function and DispatcherReg() into a fast fall-through case (removes the DispatcerReg jump
// in this function, since execution will just fall right into the DispatcherReg implementation).
//
static void __naked DispatcherEvent()
{
__asm
{
call recEventTest;
jmp DispatcherReg;
}
}
#endif
static void recExecute()
{
// Implementation Notes:
// This function enter an endless loop, which is only escapable via C++ exception handling.
// The loop is needed because some things in the rec use "ret" as a shortcut to
// invoking DispatcherReg. These things are code bits which are called infrequently,
// such as dyna_block_discard and dyna_page_reset.
StateThreadCheck_LongJmp();
try
switch( setjmp( SetJmp_RecExecute ) )
{
while( true )
{
// Note: make sure the FreezeRegs boolean is reset to true here, because
// it might be set to false, depending on if the rec exits from the context of
// an EventTest or not.
case SetJmp_Exit: break;
case 0:
case SetJmp_Dispatcher:
// Typically the Dispatcher is invoked from the EventTest code, which clears
// the FreezeRegs flag, so always be sure to reset it here:
g_EEFreezeRegs = true;
try
{
#ifdef _MSC_VER
__asm
{
push ebx
push esi
push edi
call DispatcherReg
pop edi
pop esi
pop ebx
}
#else // _MSC_VER
DispatcherReg();
#endif
}
catch( Exception::ForceDispatcherReg& )
{
}
}
}
catch( Exception::ExitRecExecute& )
{
while( true )
EnterRecompiledCode();
break;
}
g_EEFreezeRegs = false;
}
namespace R5900 {
namespace Dynarec {
namespace OpcodeImpl {
#else
// ---> SEH Model --->
static void recExecute()
{
// Implementation Notes:
// [TODO] fix this comment to explain various code entry/exit points, when I'm not so tired!
try
{
while( true )
{
// Typically the Dispatcher is invoked from the EventTest code, which clears
// the FreezeRegs flag, so always be sure to reset it here:
g_EEFreezeRegs = true;
try {
EnterRecompiledCode();
}
catch( Exception::ForceDispatcherReg& ) { }
}
}
catch( Exception::ExitRecExecute& ) { }
g_EEFreezeRegs = false;
}
#endif
////////////////////////////////////////////////////
void recSYSCALL( void ) {
void R5900::Dynarec::OpcodeImpl::recSYSCALL( void )
{
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_NODESTROY);
@ -622,13 +760,14 @@ void recSYSCALL( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 ));
xJMP( DispatcherReg );
x86SetJ8(j8Ptr[0]);
//branch = 2;
}
////////////////////////////////////////////////////
void recBREAK( void ) {
void R5900::Dynarec::OpcodeImpl::recBREAK( void )
{
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_EVERYTHING);
@ -637,13 +776,11 @@ void recBREAK( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
RET();
xJMP( DispatcherEvent );
x86SetJ8(j8Ptr[0]);
//branch = 2;
}
} } } // end namespace R5900::Dynarec::OpcodeImpl
// Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default.
static __releaseinline void ClearRecLUT(BASEBLOCK* base, int count)
{
@ -703,26 +840,34 @@ void recClear(u32 addr, u32 size)
upperextent = min(upperextent, ceiling);
#ifdef PCSX2_DEVBUILD
for (int i = 0; pexblock = recBlocks[i]; i++) {
if (s_pCurBlock == PC_GETBLOCK(pexblock->startpc))
continue;
u32 blockend = pexblock->startpc + pexblock->size * 4;
if (pexblock->startpc >= addr && pexblock->startpc < addr + size * 4
|| pexblock->startpc < addr && blockend > addr) {
Console.Error( "Impossible block clearing failure" );
pxFail( "Impossible block clearing failure" );
DevCon.Error( "Impossible block clearing failure" );
pxFailDev( "Impossible block clearing failure" );
}
}
#endif
if (upperextent > lowerextent)
ClearRecLUT(PC_GETBLOCK(lowerextent), (upperextent - lowerextent) / 4);
}
#ifndef PCSX2_SEH
jmp_buf SetJmp_RecExecute;
jmp_buf SetJmp_StateCheck;
#endif
static void ExitRec()
{
#ifdef PCSX2_SEH
throw Exception::ExitRecExecute();
#else
longjmp( SetJmp_RecExecute, SetJmp_Exit );
#endif
}
// check for end of bios
@ -730,11 +875,22 @@ void CheckForBIOSEnd()
{
xMOV( eax, &cpuRegs.pc );
xCMP( eax, 0x00200008 );
/*xCMP( eax, 0x00200008 );
xJE(ExitRec);
xCMP( eax, 0x00100008 );
xJE( ExitRec );
xJE(ExitRec);*/
xCMP( eax, 0x00200008 );
xForwardJE8 CallExitRec;
xCMP( eax, 0x00100008 );
xForwardJNE8 SkipExitRec;
CallExitRec.SetTarget();
xCALL( ExitRec );
SkipExitRec.SetTarget();
}
static int *s_pCode;
@ -833,7 +989,10 @@ void LoadBranchState()
void iFlushCall(int flushtype)
{
_freeX86regs();
// Free registers that are not saved across function calls (x86-32 ABI):
_freeX86reg(EAX);
_freeX86reg(ECX);
_freeX86reg(EDX);
if( flushtype & FLUSH_FREE_XMM )
_freeXMMregs();
@ -934,6 +1093,8 @@ static u32 eeScaleBlockCycles()
// setting "branch = 2";
static void iBranchTest(u32 newpc)
{
_DynGen_StackFrameCheck();
if( g_ExecBiosHack ) CheckForBIOSEnd();
// Check the Event scheduler if our "cycle target" has been reached.
@ -1136,8 +1297,9 @@ static void printfn()
static int curcount = 0;
const int skip = 0;
assert( !g_globalMMXSaved );
assert( !g_globalXMMSaved );
pxAssert( !g_globalMMXSaved && !g_globalXMMSaved );
//pxAssert( cpuRegs.pc != 0x80001300 );
if( (dumplog&2) && g_lastpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) {
curcount++;
@ -1151,18 +1313,22 @@ static void printfn()
}
}
u32 s_recblocks[] = {0};
void badespfn() {
Console.Error("Bad esp!");
assert(0);
}
static u32 s_recblocks[] = {0};
// Called when a block under manual protection fails it's pre-execution integrity check.
void __fastcall dyna_block_discard(u32 start,u32 sz)
{
DevCon.WriteLn("dyna_block_discard .. start=0x%08X size=%d", start, sz*4);
recClear(start, sz);
// Stack trick: This function was invoked via a direct jmp, so manually pop the
// EBP/stackframe before issuing a RET, else esp/ebp will be incorrect.
#ifdef _MSC_VER
__asm leave __asm jmp [ExitRecompiledCode]
#else
__asm__ __volatile__( "leave\n jmp *%[exitRec]\n" : : [exitRec] "m" (ExitRecompiledCode) : );
#endif
}
// called when a block under manual protection has been run enough times to be a
@ -1172,9 +1338,15 @@ void __fastcall dyna_page_reset(u32 start,u32 sz)
recClear(start & ~0xfffUL, 0x400);
manual_counter[start >> 12]++;
mmap_MarkCountedRamPage( start );
#ifdef _MSC_VER
__asm leave __asm jmp [ExitRecompiledCode]
#else
__asm__ __volatile__( "leave\n jmp *%[exitRec]\n" : : [exitRec] "m" (ExitRecompiledCode) : );
#endif
}
void recRecompile( const u32 startpc )
static void __fastcall recRecompile( const u32 startpc )
{
u32 i = 0;
u32 branchTo;

View File

@ -98,7 +98,7 @@ void recWritebackHILO(int info, int writed, int upper)
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE|MODE_READ);
if( regd >= 0 ) {
SSE_MOVLPS_M64_to_XMM(regd, loaddr);
regd |= 0x8000;
regd |= MEM_XMMTAG;
}
}
}

View File

@ -19,6 +19,8 @@
#include "iR5900.h"
#include "R5900OpcodeTables.h"
using namespace x86Emitter;
extern void _vu0WaitMicro();
extern void _vu0FinishMicro();
@ -311,14 +313,12 @@ static void recCTC2() {
}
else MOV32ItoM((uptr)&microVU0.regs->VI[_Rd_].UL, 0);
break;
case REG_CMSAR1:
case REG_CMSAR1: // Execute VU1 Micro SubRoutine
if (_Rt_) {
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
PUSH32R(EAX);
MOV32MtoR(ECX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
}
else PUSH32I(0);
CALLFunc((uptr)vu1ExecMicro); // Execute VU1 Micro SubRoutine
ADD32ItoR(ESP, 4);
else XOR32RtoR(ECX,ECX);
xCALL(vu1ExecMicro);
break;
case REG_FBRST:
if (!_Rt_) {

View File

@ -2628,17 +2628,11 @@ __declspec(naked) static void SuperVUEndProgram()
mov esi, s_vu1esi
mov edi, s_vuedi
mov ebx, s_vuebx
}
#ifdef PCSX2_DEBUG
__asm
{
sub s_vu1esp, esp
}
#endif
__asm
{
call SuperVUCleanupProgram
jmp s_callstack // so returns correctly
}
@ -3630,7 +3624,8 @@ void VuInstruction::Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz)
TEST32ItoM((uptr)&VU0.VI[REG_FBRST].UL, s_vu ? 0x400 : 0x004);
u8* ptr = JZ8(0);
OR32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, s_vu ? 0x200 : 0x002);
_callFunctionArg1((uptr)hwIntcIrq, MEM_CONSTTAG, s_vu ? INTC_VU1 : INTC_VU0);
xMOV( ecx, s_vu ? INTC_VU1 : INTC_VU0 );
xCALL( hwIntcIrq );
x86SetJ8(ptr);
}
if (ptr[1] & 0x08000000) // T flag
@ -3638,7 +3633,8 @@ void VuInstruction::Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz)
TEST32ItoM((uptr)&VU0.VI[REG_FBRST].UL, s_vu ? 0x800 : 0x008);
u8* ptr = JZ8(0);
OR32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, s_vu ? 0x400 : 0x004);
_callFunctionArg1((uptr)hwIntcIrq, MEM_CONSTTAG, s_vu ? INTC_VU1 : INTC_VU0);
xMOV( ecx, s_vu ? INTC_VU1 : INTC_VU0 );
xCALL( hwIntcIrq );
x86SetJ8(ptr);
}
@ -4357,7 +4353,7 @@ void recVUMI_XGKICK(VURegs *VU, int info)
recVUMI_XGKICK_(VU);
}
int isreg = _allocX86reg(X86ARG2, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
int isreg = _allocX86reg(ECX, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
_freeX86reg(isreg); // flush
x86regs[isreg].inuse = 1;
x86regs[isreg].type = X86TYPE_VITEMP;

View File

@ -241,7 +241,7 @@ static void CvtPacketToFloat( StereoOut32* srcdest )
// Parameter note: Size should always be a multiple of 128, thanks!
static void CvtPacketToInt( StereoOut32* srcdest, uint size )
{
jASSUME( (size & 127) == 0 );
//jASSUME( (size & 127) == 0 );
const StereoOutFloat* src = (StereoOutFloat*)srcdest;
StereoOut32* dest = srcdest;