mirror of https://github.com/PCSX2/pcsx2.git
Merged drk||Raziel's "BTS Manual Protection" enhancement for the vtlb into /trunk, and combined it with Pseudonim's "Manual Block Clear" enhancement for an ideal two-phase protection system.
Most things should be a bit faster with this new system. The system is more balanced than the previous one, in that it provides a better overall performance across most games, but some specific FMVs (like Disgaea 2's) will be a bit slower. On the other hand, others like DQ8 and Kingdom Hearts 2 FMVs get a big speedup. Almost all in-game stuff should be either the same or faster now. Set a bunch of ignores for TortoiseSVN users, as suggested in Issue 166. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1083 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
commit
31f0be6eb8
|
@ -37,7 +37,7 @@ static const uint m_psxMemSize =
|
|||
void psxMemAlloc()
|
||||
{
|
||||
if( m_psxAllMem == NULL )
|
||||
m_psxAllMem = vtlb_malloc( m_psxMemSize, 4096, 0x21000000 );
|
||||
m_psxAllMem = vtlb_malloc( m_psxMemSize, 4096 );
|
||||
|
||||
if( m_psxAllMem == NULL)
|
||||
throw Exception::OutOfMemory( "psxMemAlloc > failed allocating memory for the IOP processor." );
|
||||
|
|
|
@ -618,7 +618,7 @@ static u8* m_psAllMem = NULL;
|
|||
void memAlloc()
|
||||
{
|
||||
if( m_psAllMem == NULL )
|
||||
m_psAllMem = vtlb_malloc( m_allMemSize, 4096, 0x2400000 );
|
||||
m_psAllMem = vtlb_malloc( m_allMemSize, 4096 );
|
||||
|
||||
if( m_psAllMem == NULL)
|
||||
throw Exception::OutOfMemory( "memAlloc > failed to allocate PS2's base ram/rom/scratchpad." );
|
||||
|
|
|
@ -76,6 +76,9 @@ int _SPR0chain()
|
|||
{
|
||||
memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4);
|
||||
|
||||
// Clear dependent EE recompiler blocks, if necessary [needed for BTS protection system]
|
||||
Cpu->Clear( spr0->madr, spr0->qwc << 2 );
|
||||
|
||||
// clear VU mem also!
|
||||
TestClearVUs(spr0->madr, spr0->qwc << 2); // Wtf is going on here? AFAIK, only VIF should affect VU micromem (cottonvibes)
|
||||
|
||||
|
@ -121,6 +124,7 @@ void _SPR0interleave()
|
|||
{
|
||||
// clear VU mem also!
|
||||
TestClearVUs(spr0->madr, spr0->qwc << 2);
|
||||
Cpu->Clear( spr0->madr, spr0->qwc << 2 );
|
||||
memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4);
|
||||
}
|
||||
spr0->sadr += spr0->qwc * 16;
|
||||
|
|
|
@ -168,6 +168,7 @@ bool SysAllocateMem()
|
|||
|
||||
try
|
||||
{
|
||||
vtlb_Core_Alloc();
|
||||
memAlloc();
|
||||
psxMemAlloc();
|
||||
vuMicroMemAlloc();
|
||||
|
@ -271,6 +272,7 @@ void SysShutdownMem()
|
|||
vuMicroMemShutdown();
|
||||
psxMemShutdown();
|
||||
memShutdown();
|
||||
vtlb_Core_Shutdown();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -83,7 +83,7 @@ static const uint m_vuMemSize =
|
|||
void vuMicroMemAlloc()
|
||||
{
|
||||
if( m_vuAllMem == NULL )
|
||||
m_vuAllMem = vtlb_malloc( m_vuMemSize, 16, 0x28000000 );
|
||||
m_vuAllMem = vtlb_malloc( m_vuMemSize, 16 );
|
||||
|
||||
if( m_vuAllMem == NULL )
|
||||
throw Exception::OutOfMemory( "vuMicroMemInit > Failed to allocate VUmicro memory." );
|
||||
|
|
|
@ -61,7 +61,6 @@ vtlbHandler UnmappedVirtHandler1;
|
|||
vtlbHandler UnmappedPhyHandler0;
|
||||
vtlbHandler UnmappedPhyHandler1;
|
||||
|
||||
|
||||
/*
|
||||
__asm
|
||||
{
|
||||
|
@ -87,10 +86,22 @@ callfunction:
|
|||
jmp [readfunctions8-0x800000+eax];
|
||||
}*/
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Interpreter Implementations of VTLB Memory Operations.
|
||||
// See recVTLB.cpp for the dynarec versions.
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Helper for the BTS manual protection system. Sets a bit based on the given address,
|
||||
// marking that piece of PS2 memory as 'dirty.'
|
||||
//
|
||||
static void memwritebits(u8* ptr)
|
||||
{
|
||||
u32 offs=ptr-vtlbdata.alloc_base;
|
||||
offs/=16;
|
||||
vtlbdata.alloc_bits[offs/8] |= 1 << (offs%8);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Interpreted VTLB lookup for 8, 16, and 32 bit accesses
|
||||
template<int DataSize,typename DataType>
|
||||
__forceinline DataType __fastcall MemOp_r0(u32 addr)
|
||||
|
@ -117,6 +128,7 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr)
|
|||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Interpreterd VTLB lookup for 64 and 128 bit accesses.
|
||||
template<int DataSize,typename DataType>
|
||||
__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
|
||||
|
@ -148,6 +160,7 @@ __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
|
|||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
template<int DataSize,typename DataType>
|
||||
__forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
|
||||
{
|
||||
|
@ -155,6 +168,7 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
|
|||
s32 ppf=addr+vmv;
|
||||
if (!(ppf<0))
|
||||
{
|
||||
memwritebits((u8*)ppf);
|
||||
*reinterpret_cast<DataType*>(ppf)=data;
|
||||
}
|
||||
else
|
||||
|
@ -174,6 +188,8 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
template<int DataSize,typename DataType>
|
||||
__forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
|
||||
{
|
||||
|
@ -182,6 +198,7 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
|
|||
s32 ppf=addr+vmv;
|
||||
if (!(ppf<0))
|
||||
{
|
||||
memwritebits((u8*)ppf);
|
||||
*reinterpret_cast<DataType*>(ppf)=*data;
|
||||
if (DataSize==128)
|
||||
*reinterpret_cast<DataType*>(ppf+8)=data[1];
|
||||
|
@ -202,7 +219,6 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
mem8_t __fastcall vtlb_memRead8(u32 mem)
|
||||
{
|
||||
return MemOp_r0<8,mem8_t>(mem);
|
||||
|
@ -328,7 +344,7 @@ void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { Console::E
|
|||
void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console::Error("vtlbDefaultPhyWrite128: 0x%X",params addr); verify(false); }
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// VTLB Public API -- Init/Term/RegisterHandler stuff
|
||||
//
|
||||
|
||||
|
@ -361,6 +377,7 @@ vtlbHandler vtlb_RegisterHandler( vtlbMemR8FP* r8,vtlbMemR16FP* r16,vtlbMemR32FP
|
|||
return rv;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Maps the given hander (created with vtlb_RegisterHandler) to the specified memory region.
|
||||
// New mappings always assume priority over previous mappings, so place "generic" mappings for
|
||||
// large areas of memory first, and then specialize specific small regions of memory afterward.
|
||||
|
@ -500,7 +517,8 @@ void vtlb_VMapUnmap(u32 vaddr,u32 sz)
|
|||
}
|
||||
}
|
||||
|
||||
// Clears vtlb handlers and memory mappings.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// vtlb_init -- Clears vtlb handlers and memory mappings.
|
||||
void vtlb_Init()
|
||||
{
|
||||
vtlbHandlerCount=0;
|
||||
|
@ -540,7 +558,8 @@ void vtlb_Init()
|
|||
vtlb_VMapUnmap((VTLB_VMAP_ITEMS-1)*VTLB_PAGE_SIZE,VTLB_PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Performs a COP0-level reset of the PS2's TLB.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// vtlb_Reset -- Performs a COP0-level reset of the PS2's TLB.
|
||||
// This function should probably be part of the COP0 rather than here in VTLB.
|
||||
void vtlb_Reset()
|
||||
{
|
||||
|
@ -552,30 +571,65 @@ void vtlb_Term()
|
|||
//nothing to do for now
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Reserves the vtlb core allocation used by various emulation components!
|
||||
//
|
||||
void vtlb_Core_Alloc()
|
||||
{
|
||||
if( vtlbdata.alloc_base != NULL ) return;
|
||||
|
||||
vtlbdata.alloc_current = 0;
|
||||
|
||||
#ifdef __LINUX__
|
||||
vtlbdata.alloc_base = SysMmapEx( 0x16000000, VTLB_ALLOC_SIZE, 0x80000000, "Vtlb" );
|
||||
#else
|
||||
// Win32 just needs this, since malloc always maps below 2GB.
|
||||
vtlbdata.alloc_base = (u8*)_aligned_malloc( VTLB_ALLOC_SIZE, 4096 );
|
||||
if( vtlbdata.alloc_base == NULL )
|
||||
throw Exception::OutOfMemory( "Fatal Error: could not allocate 42Meg buffer for PS2's mappable system ram." );
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
void vtlb_Core_Shutdown()
|
||||
{
|
||||
if( vtlbdata.alloc_base == NULL ) return;
|
||||
|
||||
#ifdef __LINUX__
|
||||
SafeSysMunmap( vtlbdata.alloc_base, VTLB_ALLOC_SIZE );
|
||||
#else
|
||||
// Make sure and unprotect memory first, since CrtDebug will try to write to it.
|
||||
HostSys::MemProtect( vtlbdata.alloc_base, VTLB_ALLOC_SIZE, Protect_ReadWrite );
|
||||
safe_aligned_free( vtlbdata.alloc_base );
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// This function allocates memory block with are compatible with the Vtlb's requirements
|
||||
// for memory locations. The Vtlb requires the topmost bit (Sign bit) of the memory
|
||||
// pointer to be cleared. Some operating systems and/or implementations of malloc do that,
|
||||
// but others do not. So use this instead to allocate the memory correctly for your
|
||||
// platform.
|
||||
u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress )
|
||||
//
|
||||
u8* vtlb_malloc( uint size, uint align )
|
||||
{
|
||||
#ifdef __LINUX__
|
||||
return SysMmapEx( tryBaseAddress, size, 0x80000000, "Vtlb" );
|
||||
#else
|
||||
// Win32 just needs this, since malloc always maps below 2GB.
|
||||
return (u8*)_aligned_malloc(size, align);
|
||||
#endif
|
||||
vtlbdata.alloc_current += align-1;
|
||||
vtlbdata.alloc_current &= ~(align-1);
|
||||
|
||||
int rv = vtlbdata.alloc_current;
|
||||
vtlbdata.alloc_current += size;
|
||||
return &vtlbdata.alloc_base[rv];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
void vtlb_free( void* pmem, uint size )
|
||||
{
|
||||
if( pmem == NULL ) return;
|
||||
|
||||
#ifdef __LINUX__
|
||||
SafeSysMunmap( pmem, size );
|
||||
#else
|
||||
// Make sure and unprotect memory first, since CrtDebug will try to write to it.
|
||||
HostSys::MemProtect( pmem, size, Protect_ReadWrite );
|
||||
safe_aligned_free( pmem );
|
||||
#endif
|
||||
// Does nothing anymore! Alloc/dealloc is now handled by vtlb_Core_Alloc /
|
||||
// vtlb_Core_Shutdown. Placebo is left in place in case it becomes useful again
|
||||
// at a later date.
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
11
pcsx2/vtlb.h
11
pcsx2/vtlb.h
|
@ -23,10 +23,12 @@ typedef void __fastcall vtlbMemW128FP(u32 addr,const mem128_t* data);
|
|||
|
||||
typedef u32 vtlbHandler;
|
||||
|
||||
extern void vtlb_Core_Alloc();
|
||||
extern void vtlb_Core_Shutdown();
|
||||
extern void vtlb_Init();
|
||||
extern void vtlb_Reset();
|
||||
extern void vtlb_Term();
|
||||
extern u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress );
|
||||
extern u8* vtlb_malloc( uint size, uint align );
|
||||
extern void vtlb_free( void* pmem, uint size );
|
||||
|
||||
|
||||
|
@ -67,6 +69,8 @@ extern void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const );
|
|||
|
||||
namespace vtlb_private
|
||||
{
|
||||
static const uint VTLB_ALLOC_SIZE = 0x2900000; //this is a bit more than required
|
||||
|
||||
static const uint VTLB_PAGE_BITS = 12;
|
||||
static const uint VTLB_PAGE_MASK = 4095;
|
||||
static const uint VTLB_PAGE_SIZE = 4096;
|
||||
|
@ -77,6 +81,11 @@ namespace vtlb_private
|
|||
|
||||
struct MapData
|
||||
{
|
||||
u8 alloc_bits[VTLB_ALLOC_SIZE/16/8];
|
||||
|
||||
u8* alloc_base; //base of the memory array
|
||||
int alloc_current; //current base
|
||||
|
||||
s32 pmap[VTLB_PMAP_ITEMS]; //512KB
|
||||
s32 vmap[VTLB_VMAP_ITEMS]; //4MB
|
||||
|
||||
|
|
|
@ -2883,7 +2883,7 @@
|
|||
</Filter>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Dynarec Emitter"
|
||||
Name="x86Emitter"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\x86\ix86\ix86.cpp"
|
||||
|
|
|
@ -202,10 +202,8 @@ void WinRun()
|
|||
_doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 );
|
||||
|
||||
|
||||
#ifndef _DEBUG
|
||||
if( Config.Profiler )
|
||||
ProfilerInit();
|
||||
#endif
|
||||
|
||||
InitCPUTicks();
|
||||
|
||||
|
@ -800,7 +798,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
|
|||
SaveConfig();
|
||||
break;
|
||||
|
||||
#ifndef _DEBUG
|
||||
case ID_PROFILER:
|
||||
Config.Profiler = !Config.Profiler;
|
||||
if( Config.Profiler )
|
||||
|
@ -815,7 +812,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
|
|||
}
|
||||
SaveConfig();
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
if (LOWORD(wParam) >= ID_LANGS && LOWORD(wParam) <= (ID_LANGS + langsMax))
|
||||
|
@ -989,9 +985,7 @@ void CreateMainMenu() {
|
|||
ADDMENUITEM(0,_("Print cdvd &Info"), ID_CDVDPRINT);
|
||||
ADDMENUITEM(0,_("Close GS Window on Esc"), ID_CLOSEGS);
|
||||
ADDSEPARATOR(0);
|
||||
#ifndef _DEBUG
|
||||
ADDMENUITEM(0,_("Enable &Profiler"), ID_PROFILER);
|
||||
#endif
|
||||
ADDMENUITEM(0,_("Enable &Patches"), ID_PATCHES);
|
||||
ADDMENUITEM(0,_("Enable &Console"), ID_CONSOLE);
|
||||
ADDSEPARATOR(0);
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
//
|
||||
// Generated from the TEXTINCLUDE 2 resource.
|
||||
//
|
||||
#include "afxresmw.h"
|
||||
#include "afxresmw.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#undef APSTUDIO_READONLY_SYMBOLS
|
||||
|
||||
|
@ -899,7 +900,8 @@ END
|
|||
//
|
||||
// Generated from the TEXTINCLUDE 3 resource.
|
||||
//
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#endif // not APSTUDIO_INVOKED
|
||||
|
||||
|
|
|
@ -418,6 +418,9 @@ static void recAlloc()
|
|||
x86FpuState = FPU_STATE;
|
||||
}
|
||||
|
||||
PCSX2_ALIGNED16( static u16 manual_page[Ps2MemSize::Base >> 12] );
|
||||
PCSX2_ALIGNED16( static u8 manual_counter[Ps2MemSize::Base >> 12] );
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recResetEE( void )
|
||||
{
|
||||
|
@ -427,6 +430,8 @@ void recResetEE( void )
|
|||
|
||||
memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3
|
||||
memzero_ptr<m_recBlockAllocSize>( m_recBlockAlloc );
|
||||
memzero_obj( manual_page );
|
||||
memzero_obj( manual_counter );
|
||||
ClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
|
||||
(((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));
|
||||
|
||||
|
@ -720,7 +725,6 @@ static void ClearRecLUT(BASEBLOCK* base, int count)
|
|||
base[i].SetFnptr((uptr)JITCompile);
|
||||
}
|
||||
|
||||
// Returns the offset to the next instruction after any cleared memory
|
||||
void recClear(u32 addr, u32 size)
|
||||
{
|
||||
BASEBLOCKEX* pexblock;
|
||||
|
@ -1256,14 +1260,16 @@ void badespfn() {
|
|||
|
||||
void __fastcall dyna_block_discard(u32 start,u32 sz)
|
||||
{
|
||||
DevCon::WriteLn("dyna_block_discard %08X , count %d", params start,sz);
|
||||
Cpu->Clear(start,sz);
|
||||
DevCon::WriteLn("dyna_block_discard .. start: %08X count=%d", params start,sz);
|
||||
Cpu->Clear(start, sz);
|
||||
}
|
||||
|
||||
void __fastcall dyna_block_reset(u32 start,u32 sz)
|
||||
|
||||
void __fastcall dyna_page_reset(u32 start,u32 sz)
|
||||
{
|
||||
DevCon::WriteLn("dyna_block_reset %08X , count %d", params start,sz);
|
||||
DevCon::WriteLn("dyna_page_reset .. start=%08X count=%d", params start,sz);
|
||||
Cpu->Clear(start & ~0xfffUL, 0x400);
|
||||
manual_counter[start >> 10]++;
|
||||
mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL);
|
||||
}
|
||||
|
||||
|
@ -1490,98 +1496,6 @@ StartRecomp:
|
|||
// instruction being analyzed.
|
||||
if( usecop2 ) vucycle++;
|
||||
|
||||
// peephole optimizations //
|
||||
#ifdef PCSX2_VM_COISSUE
|
||||
if( i < s_nEndBlock-4 && recompileCodeSafe(i) ) {
|
||||
u32 curcode = cpuRegs.code;
|
||||
u32 nextcode = *(u32*)PSM(i+4);
|
||||
if( _eeIsLoadStoreCoIssue(curcode, nextcode) && recBSC_co[curcode>>26] != NULL ) {
|
||||
|
||||
// rs has to be the same, and cannot be just written
|
||||
if( ((curcode >> 21) & 0x1F) == ((nextcode >> 21) & 0x1F) && !_eeLoadWritesRs(curcode) ) {
|
||||
|
||||
if( _eeIsLoadStoreCoX(curcode) && ((nextcode>>16)&0x1f) != ((curcode>>21)&0x1f) ) {
|
||||
// see how many stores there are
|
||||
u32 j;
|
||||
// use xmmregs since only supporting lwc1,lq,swc1,sq
|
||||
for(j = i+8; j < s_nEndBlock && j < i+4*iREGCNT_XMM; j += 4 ) {
|
||||
u32 nncode = *(u32*)PSM(j);
|
||||
if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) ||
|
||||
_eeLoadWritesRs(nncode))
|
||||
break;
|
||||
}
|
||||
|
||||
if( j > i+8 ) {
|
||||
u32 num = (j-i)>>2; // number of stores that can coissue
|
||||
assert( num <= iREGCNT_XMM );
|
||||
|
||||
g_pCurInstInfo[0].numpeeps = num-1;
|
||||
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
|
||||
|
||||
while(i < j-4) {
|
||||
g_pCurInstInfo++;
|
||||
g_pCurInstInfo[0].info |= EEINSTINFO_NOREC;
|
||||
i += 4;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// fall through
|
||||
}
|
||||
|
||||
// unaligned loadstores
|
||||
|
||||
// if LWL, check if LWR and that offsets are +3 away
|
||||
switch(curcode >> 26) {
|
||||
case 0x22: // LWL
|
||||
if( (nextcode>>26) != 0x26 || ((s16)nextcode)+3 != (s16)curcode )
|
||||
continue;
|
||||
break;
|
||||
case 0x26: // LWR
|
||||
if( (nextcode>>26) != 0x22 || ((s16)nextcode) != (s16)curcode+3 )
|
||||
continue;
|
||||
break;
|
||||
|
||||
case 0x2a: // SWL
|
||||
if( (nextcode>>26) != 0x2e || ((s16)nextcode)+3 != (s16)curcode )
|
||||
continue;
|
||||
break;
|
||||
case 0x2e: // SWR
|
||||
if( (nextcode>>26) != 0x2a || ((s16)nextcode) != (s16)curcode+3 )
|
||||
continue;
|
||||
break;
|
||||
|
||||
case 0x1a: // LDL
|
||||
if( (nextcode>>26) != 0x1b || ((s16)nextcode)+7 != (s16)curcode )
|
||||
continue;
|
||||
break;
|
||||
case 0x1b: // LWR
|
||||
if( (nextcode>>26) != 0x1aa || ((s16)nextcode) != (s16)curcode+7 )
|
||||
continue;
|
||||
break;
|
||||
|
||||
case 0x2c: // SWL
|
||||
if( (nextcode>>26) != 0x2d || ((s16)nextcode)+7 != (s16)curcode )
|
||||
continue;
|
||||
break;
|
||||
case 0x2d: // SWR
|
||||
if( (nextcode>>26) != 0x2c || ((s16)nextcode) != (s16)curcode+7 )
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
|
||||
// good enough
|
||||
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
|
||||
g_pCurInstInfo[0].numpeeps = 1;
|
||||
g_pCurInstInfo[1].info |= EEINSTINFO_NOREC;
|
||||
g_pCurInstInfo++;
|
||||
i += 4;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // end peephole
|
||||
}
|
||||
// This *is* important because g_pCurInstInfo is checked a bit later on and
|
||||
// if it's not equal to s_pInstCache it handles recompilation differently.
|
||||
|
@ -1611,7 +1525,6 @@ StartRecomp:
|
|||
iDumpBlock(startpc, recPtr);
|
||||
#endif
|
||||
|
||||
static u16 manual_page[Ps2MemSize::Base >> 12];
|
||||
u32 sz=(s_nEndBlock-startpc)>>2;
|
||||
|
||||
u32 inpage_ptr=HWADDR(startpc);
|
||||
|
@ -1631,31 +1544,76 @@ StartRecomp:
|
|||
}
|
||||
else
|
||||
{
|
||||
// import the vtlbdata (alloc_bits and alloc_base and stuff):
|
||||
using namespace vtlb_private;
|
||||
|
||||
MOV32ItoR(ECX, inpage_ptr);
|
||||
MOV32ItoR(EDX, pgsz);
|
||||
|
||||
u32 mask=0;
|
||||
u32 writen=0;
|
||||
u32 writen_start=0;
|
||||
|
||||
u32 lpc=inpage_ptr;
|
||||
u32 stg=pgsz;
|
||||
|
||||
while(stg>0)
|
||||
{
|
||||
// was dyna_block_discard_recmem. See note in recResetEE for details.
|
||||
CMP32ItoM((uptr)PSM(lpc),*(u32*)PSM(lpc));
|
||||
JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 ));
|
||||
u32 bit = (lpc>>4) & 7;
|
||||
if (mask==0)
|
||||
{
|
||||
//writen=bit;
|
||||
writen_start=(((u8*)PSM(lpc)-vtlbdata.alloc_base)>>4)/8;
|
||||
}
|
||||
mask |= 1 << bit;
|
||||
|
||||
stg-=4;
|
||||
lpc+=4;
|
||||
if (bit==31)
|
||||
{
|
||||
vtlbdata.alloc_bits[writen_start]&=~mask;
|
||||
xTEST( ptr32[&vtlbdata.alloc_bits[writen_start]], mask ); // auto-optimizes to imm8 when applicable.
|
||||
xJNZ( dyna_block_discard );
|
||||
//SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4);
|
||||
mask = 0;
|
||||
}
|
||||
|
||||
//writen++;
|
||||
|
||||
if (stg<=16)
|
||||
{
|
||||
lpc += stg;
|
||||
stg = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
lpc += 16;
|
||||
stg -= 16;
|
||||
}
|
||||
}
|
||||
if (startpc != 0x81fc0) {
|
||||
|
||||
if (mask)
|
||||
{
|
||||
vtlbdata.alloc_bits[writen_start] &= ~mask;
|
||||
xTEST( ptr32[&vtlbdata.alloc_bits[writen_start]], mask ); // auto-optimizes to imm8 when applicable.
|
||||
xJNZ( dyna_block_discard );
|
||||
//SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4);
|
||||
mask = 0;
|
||||
}
|
||||
|
||||
if( startpc != 0x81fc0 && manual_counter[inpage_ptr >> 12] <= 4 )
|
||||
{
|
||||
// Commented out until we replace it with a smarter algo that only
|
||||
// recompiles blocks a limited number of times.
|
||||
|
||||
xADD(ptr16[&manual_page[inpage_ptr >> 12]], 1);
|
||||
xJC( dyna_block_reset );
|
||||
xJC( dyna_page_reset );
|
||||
}
|
||||
|
||||
DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params
|
||||
startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4);
|
||||
}
|
||||
}
|
||||
inpage_ptr+=pgsz;
|
||||
inpage_sz-=pgsz;
|
||||
inpage_ptr += pgsz;
|
||||
inpage_sz -= pgsz;
|
||||
}
|
||||
|
||||
// finally recompile //
|
||||
|
|
|
@ -23,31 +23,88 @@
|
|||
|
||||
#include "iCore.h"
|
||||
#include "iR5900.h"
|
||||
#include "ix86\ix86_internal.h"
|
||||
|
||||
using namespace vtlb_private;
|
||||
using namespace x86Emitter;
|
||||
|
||||
// NOTICE: This function *destroys* EAX!!
|
||||
// Moves 128 bits of memory from the source register ptr to the dest register ptr.
|
||||
// (used as an equivalent to movaps, when a free XMM register is unavailable for some reason)
|
||||
void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm )
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// iAllocRegSSE -- allocates an xmm register. If no xmm register is available, xmm0 is
|
||||
// saved into g_globalXMMData and returned as a free register.
|
||||
//
|
||||
class iAllocRegSSE
|
||||
{
|
||||
// (this is one of my test cases for the new emitter --air)
|
||||
protected:
|
||||
xRegisterSSE m_reg;
|
||||
bool m_free;
|
||||
|
||||
xAddressReg src( srcRm );
|
||||
xAddressReg dest( destRm );
|
||||
public:
|
||||
iAllocRegSSE() :
|
||||
m_reg( xmm0 ),
|
||||
m_free( !!_hasFreeXMMreg() )
|
||||
{
|
||||
if( m_free )
|
||||
m_reg = xRegisterSSE( _allocTempXMMreg( XMMT_INT, -1 ) );
|
||||
else
|
||||
xStoreReg( m_reg );
|
||||
}
|
||||
|
||||
xMOV( eax, ptr[src] );
|
||||
xMOV( ptr[dest], eax );
|
||||
~iAllocRegSSE()
|
||||
{
|
||||
if( m_free )
|
||||
_freeXMMreg( m_reg.Id );
|
||||
else
|
||||
xRestoreReg( m_reg );
|
||||
}
|
||||
|
||||
operator xRegisterSSE() const { return m_reg; }
|
||||
};
|
||||
|
||||
xMOV( eax, ptr[src+4] );
|
||||
xMOV( ptr[dest+4], eax );
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Moves 128 bits from point B to point A, using SSE's MOVAPS (or MOVDQA).
|
||||
// This instruction always uses an SSE register, even if all registers are allocated! It
|
||||
// saves an SSE register to memory first, performs the copy, and restores the register.
|
||||
//
|
||||
void iMOV128_SSE( const ModSibBase& destRm, const ModSibBase& srcRm )
|
||||
{
|
||||
iAllocRegSSE reg;
|
||||
xMOVDQA( reg, srcRm );
|
||||
xMOVDQA( destRm, reg );
|
||||
}
|
||||
|
||||
xMOV( eax, ptr[src+8] );
|
||||
xMOV( ptr[dest+8], eax );
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Moves 64 bits of data from point B to point A, using either MMX, SSE, or x86 registers
|
||||
// if neither MMX nor SSE is available to the task.
|
||||
//
|
||||
// Optimizations: This method uses MMX is the cpu is in MMX mode, or SSE if it's in FPU
|
||||
// mode (saving on potential EMMS uses).
|
||||
//
|
||||
void iMOV64_Smart( const ModSibBase& destRm, const ModSibBase& srcRm )
|
||||
{
|
||||
if( (x86FpuState == FPU_STATE) && _hasFreeXMMreg() )
|
||||
{
|
||||
// Move things using MOVLPS:
|
||||
xRegisterSSE reg( _allocTempXMMreg( XMMT_INT, -1 ) );
|
||||
xMOVL.PS( reg, srcRm );
|
||||
xMOVL.PS( destRm, reg );
|
||||
_freeXMMreg( reg.Id );
|
||||
return;
|
||||
}
|
||||
|
||||
xMOV( eax, ptr[src+12] );
|
||||
xMOV( ptr[dest+12], eax );
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
xRegisterMMX reg( _allocMMXreg(-1, MMX_TEMP, 0) );
|
||||
xMOVQ( reg, srcRm );
|
||||
xMOVQ( destRm, reg );
|
||||
_freeMMXreg( reg.Id );
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV( eax, srcRm );
|
||||
xMOV( destRm, eax );
|
||||
xMOV( eax, srcRm+4 );
|
||||
xMOV( destRm+4, eax );
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -127,38 +184,11 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign )
|
|||
break;
|
||||
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoR(freereg,ECX);
|
||||
MOVQRtoRm(EDX,freereg);
|
||||
_freeMMXreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,ECX);
|
||||
MOV32RtoRm(EDX,EAX);
|
||||
|
||||
MOV32RmtoR(EAX,ECX,4);
|
||||
MOV32RtoRm(EDX,EAX,4);
|
||||
}
|
||||
iMOV64_Smart(ptr[edx],ptr[ecx]);
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoR(freereg,ECX);
|
||||
SSE2_MOVDQARtoRm(EDX,freereg);
|
||||
_freeXMMreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV128_MtoM( EDX, ECX ); // dest <- src!
|
||||
}
|
||||
iMOV128_SSE(ptr[edx],ptr[ecx]);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
|
@ -262,39 +292,11 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
|
|||
switch( bits )
|
||||
{
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQMtoR(freereg,ppf);
|
||||
MOVQRtoRm(EDX,freereg);
|
||||
_freeMMXreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32MtoR(EAX,ppf);
|
||||
MOV32RtoRm(EDX,EAX);
|
||||
|
||||
MOV32MtoR(EAX,ppf+4);
|
||||
MOV32RtoRm(EDX,EAX,4);
|
||||
}
|
||||
iMOV64_Smart(ptr[edx],ptr[ppf]);
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQA_M128_to_XMM( freereg, ppf );
|
||||
SSE2_MOVDQARtoRm(EDX,freereg);
|
||||
_freeXMMreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV32ItoR( ECX, ppf );
|
||||
MOV128_MtoM( EDX, ECX ); // dest <- src!
|
||||
}
|
||||
iMOV128_SSE(ptr[edx],ptr[ppf]);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
|
@ -415,40 +417,21 @@ static void _vtlb_DynGen_DirectWrite( u32 bits )
|
|||
break;
|
||||
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoR(freereg,EDX);
|
||||
MOVQRtoRm(ECX,freereg);
|
||||
_freeMMXreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,EDX);
|
||||
MOV32RtoRm(ECX,EAX);
|
||||
|
||||
MOV32RmtoR(EAX,EDX,4);
|
||||
MOV32RtoRm(ECX,EAX,4);
|
||||
}
|
||||
iMOV64_Smart(ptr[ecx],ptr[edx]);
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoR(freereg,EDX);
|
||||
SSE2_MOVDQARtoRm(ECX,freereg);
|
||||
_freeXMMreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV128_MtoM( ECX, EDX ); // dest <- src!
|
||||
}
|
||||
iMOV128_SSE(ptr[ecx],ptr[edx]);
|
||||
break;
|
||||
}
|
||||
|
||||
xSHR( ecx, 4 );
|
||||
|
||||
uptr alloc_base = (uptr)vtlbdata.alloc_base;
|
||||
u8* bits_base = vtlbdata.alloc_bits;
|
||||
bits_base -= (alloc_base>>4)/8; //in bytes
|
||||
|
||||
xBTS( ecx, bits_base );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -514,39 +497,11 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
|||
break;
|
||||
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoR(freereg,EDX);
|
||||
MOVQRtoM(ppf,freereg);
|
||||
_freeMMXreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,EDX);
|
||||
MOV32RtoM(ppf,EAX);
|
||||
|
||||
MOV32RmtoR(EAX,EDX,4);
|
||||
MOV32RtoM(ppf+4,EAX);
|
||||
}
|
||||
iMOV64_Smart( ptr[ppf], ptr[edx] );
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoR(freereg,EDX);
|
||||
SSE2_MOVDQA_XMM_to_M128(ppf,freereg);
|
||||
_freeXMMreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV32ItoR( ECX, ppf );
|
||||
MOV128_MtoM( ECX, EDX ); // dest <- src!
|
||||
}
|
||||
iMOV128_SSE( ptr[ppf], ptr[edx] );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -571,3 +526,4 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
|||
CALLFunc( (int)vtlbdata.RWFT[szidx][1][handler] );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,9 @@
|
|||
|
||||
namespace x86Emitter
|
||||
{
|
||||
extern void xStoreReg( const xRegisterSSE& src );
|
||||
extern void xRestoreReg( const xRegisterSSE& dest );
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Group 1 Instruction Class
|
||||
|
||||
|
|
|
@ -677,8 +677,6 @@ extern void CDQE( void );
|
|||
extern void LAHF();
|
||||
extern void SAHF();
|
||||
|
||||
extern void BT32ItoR( x86IntRegType to, u8 from );
|
||||
extern void BTR32ItoR( x86IntRegType to, u8 from );
|
||||
extern void BSRRtoR(x86IntRegType to, x86IntRegType from);
|
||||
extern void BSWAP32R( x86IntRegType to );
|
||||
|
||||
|
|
|
@ -30,9 +30,22 @@ u8 g_globalXMMSaved = 0;
|
|||
PCSX2_ALIGNED16( static u64 g_globalMMXData[8] );
|
||||
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*iREGCNT_XMM] );
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
void xStoreReg( const xRegisterSSE& src )
|
||||
{
|
||||
xMOVDQA( &g_globalXMMData[src.Id], src );
|
||||
}
|
||||
|
||||
void xRestoreReg( const xRegisterSSE& dest )
|
||||
{
|
||||
xMOVDQA( dest, &g_globalXMMData[dest.Id] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// SetCPUState -- for assugnment of SSE roundmodes and clampmodes.
|
||||
// SetCPUState -- for assignment of SSE roundmodes and clampmodes.
|
||||
|
||||
u32 g_sseMXCSR = DEFAULT_sseMXCSR;
|
||||
u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR;
|
||||
|
|
Loading…
Reference in New Issue