mirror of https://github.com/PCSX2/pcsx2.git
--This breaks linux.
--Basic vtlb code rewrite for full mapping using exceptions --This is buggy & leaks ram for now git-svn-id: http://pcsx2.googlecode.com/svn/branches/vtlb-exp@934 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c81e012e51
commit
6c88e99cf2
|
@ -357,13 +357,15 @@ static void psxCheckEndGate32(int i)
|
|||
_psxCheckEndGate( i );
|
||||
}
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
void psxVBlankStart()
|
||||
{
|
||||
cdvdVsync();
|
||||
psxHu32(0x1070) |= 1;
|
||||
if(psxvblankgate & (1 << 1)) psxCheckStartGate16(1);
|
||||
if(psxvblankgate & (1 << 3)) psxCheckStartGate32(3);
|
||||
if (GetAsyncKeyState('P'))
|
||||
Cpu->Reset();
|
||||
}
|
||||
|
||||
void psxVBlankEnd()
|
||||
|
|
|
@ -61,6 +61,12 @@ vtlbHandler UnmappedVirtHandler1;
|
|||
vtlbHandler UnmappedPhyHandler0;
|
||||
vtlbHandler UnmappedPhyHandler1;
|
||||
|
||||
#define VTLB_ALLOC_SIZE (0x2900000) //this is a bit more than required
|
||||
|
||||
u8* vtlb_alloc_base; //base of the memory array
|
||||
u8* vtlb_alloc_current; //current base
|
||||
u8 vtlb_alloc_bits[VTLB_ALLOC_SIZE/16/8]; //328 kb
|
||||
|
||||
|
||||
/*
|
||||
__asm
|
||||
|
@ -91,6 +97,13 @@ callfunction:
|
|||
// Interpreter Implementations of VTLB Memory Operations.
|
||||
// See recVTLB.cpp for the dynarec versions.
|
||||
|
||||
void memwritebits(u8* ptr)
|
||||
{
|
||||
u32 offs=ptr-vtlb_alloc_base;
|
||||
offs/=16;
|
||||
vtlb_alloc_bits[offs/8]|=1<<(offs%8);
|
||||
}
|
||||
|
||||
// Interpreted VTLB lookup for 8, 16, and 32 bit accesses
|
||||
template<int DataSize,typename DataType>
|
||||
__forceinline DataType __fastcall MemOp_r0(u32 addr)
|
||||
|
@ -116,7 +129,6 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr)
|
|||
jNO_DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
// Interpreterd VTLB lookup for 64 and 128 bit accesses.
|
||||
template<int DataSize,typename DataType>
|
||||
__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
|
||||
|
@ -155,6 +167,7 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
|
|||
s32 ppf=addr+vmv;
|
||||
if (!(ppf<0))
|
||||
{
|
||||
//memwritebits((u8*)ppf);
|
||||
*reinterpret_cast<DataType*>(ppf)=data;
|
||||
}
|
||||
else
|
||||
|
@ -182,6 +195,7 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
|
|||
s32 ppf=addr+vmv;
|
||||
if (!(ppf<0))
|
||||
{
|
||||
//memwritebits((u8*)ppf);
|
||||
*reinterpret_cast<DataType*>(ppf)=*data;
|
||||
if (DataSize==128)
|
||||
*reinterpret_cast<DataType*>(ppf+8)=data[1];
|
||||
|
@ -552,6 +566,13 @@ void vtlb_Term()
|
|||
//nothing to do for now
|
||||
}
|
||||
|
||||
|
||||
void vtlb_alloc_mem()
|
||||
{
|
||||
u32 size=VTLB_ALLOC_SIZE;
|
||||
vtlb_alloc_base=SysMmapEx( 0, size, 0x80000000, "Vtlb");
|
||||
vtlb_alloc_current=vtlb_alloc_base;
|
||||
}
|
||||
// This function allocates memory block with are compatible with the Vtlb's requirements
|
||||
// for memory locations. The Vtlb requires the topmost bit (Sign bit) of the memory
|
||||
// pointer to be cleared. Some operating systems and/or implementations of malloc do that,
|
||||
|
@ -559,6 +580,17 @@ void vtlb_Term()
|
|||
// platform.
|
||||
u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress )
|
||||
{
|
||||
if (!vtlb_alloc_base)
|
||||
vtlb_alloc_mem();
|
||||
|
||||
u32 realign=((uptr)vtlb_alloc_current&(align-1));
|
||||
if (realign)
|
||||
vtlb_alloc_current+=align-realign;
|
||||
|
||||
u8* rv=vtlb_alloc_current;
|
||||
vtlb_alloc_current+=size;
|
||||
return rv;
|
||||
|
||||
#ifdef __LINUX__
|
||||
return SysMmapEx( tryBaseAddress, size, 0x80000000, "Vtlb" );
|
||||
#else
|
||||
|
@ -569,6 +601,7 @@ u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress )
|
|||
|
||||
void vtlb_free( void* pmem, uint size )
|
||||
{
|
||||
return;//whatever
|
||||
if( pmem == NULL ) return;
|
||||
|
||||
#ifdef __LINUX__
|
||||
|
|
|
@ -202,10 +202,8 @@ void WinRun()
|
|||
_doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 );
|
||||
|
||||
|
||||
#ifndef _DEBUG
|
||||
if( Config.Profiler )
|
||||
ProfilerInit();
|
||||
#endif
|
||||
|
||||
InitCPUTicks();
|
||||
|
||||
|
@ -800,7 +798,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
|
|||
SaveConfig();
|
||||
break;
|
||||
|
||||
#ifndef _DEBUG
|
||||
case ID_PROFILER:
|
||||
Config.Profiler = !Config.Profiler;
|
||||
if( Config.Profiler )
|
||||
|
@ -815,7 +812,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
|
|||
}
|
||||
SaveConfig();
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
if (LOWORD(wParam) >= ID_LANGS && LOWORD(wParam) <= (ID_LANGS + langsMax))
|
||||
|
@ -989,9 +985,7 @@ void CreateMainMenu() {
|
|||
ADDMENUITEM(0,_("Print cdvd &Info"), ID_CDVDPRINT);
|
||||
ADDMENUITEM(0,_("Close GS Window on Esc"), ID_CLOSEGS);
|
||||
ADDSEPARATOR(0);
|
||||
#ifndef _DEBUG
|
||||
ADDMENUITEM(0,_("Enable &Profiler"), ID_PROFILER);
|
||||
#endif
|
||||
ADDMENUITEM(0,_("Enable &Patches"), ID_PATCHES);
|
||||
ADDMENUITEM(0,_("Enable &Console"), ID_CONSOLE);
|
||||
ADDSEPARATOR(0);
|
||||
|
|
|
@ -49,14 +49,53 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps )
|
|||
}
|
||||
|
||||
// get bad virtual address
|
||||
u32 offset = (u8*)ExceptionRecord.ExceptionInformation[1]-psM;
|
||||
uptr addr=ExceptionRecord.ExceptionInformation[1];
|
||||
|
||||
if (offset>=Ps2MemSize::Base)
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
//this is a *hackfix* for a bug on x64 windows kernels.They do not give correct address
|
||||
//if the error is a missaligned access (they return 0)
|
||||
if (addr==0)
|
||||
{
|
||||
if (eps->ContextRecord->Ecx & 0x80000000)
|
||||
addr=eps->ContextRecord->Ecx;
|
||||
}
|
||||
u32 offset = addr-(uptr)psM;
|
||||
|
||||
if (addr&0x80000000)
|
||||
{
|
||||
uptr _vtlb_HandleRewrite(uptr code);
|
||||
u8* pcode=(u8*)ExceptionRecord.ExceptionAddress;
|
||||
|
||||
mmap_ClearCpuBlock( offset );
|
||||
u32 patch_point=1;
|
||||
//01 C1
|
||||
while(pcode[-patch_point]!=0x81 || pcode[-patch_point-1]!=0xC1 || pcode[-patch_point-2]!=0x01)
|
||||
{
|
||||
patch_point++;
|
||||
}
|
||||
assert(pcode[-patch_point]==0x81);
|
||||
pcode[-patch_point]=0xF;//js32, 0x81 is add32
|
||||
pcode[-patch_point+1]=0x88;
|
||||
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
//resume execution from correct point
|
||||
|
||||
eps->ContextRecord->Eax-=*(u32*)&pcode[-patch_point+2];
|
||||
|
||||
uptr codeloc=_vtlb_HandleRewrite(*(u32*)&pcode[-patch_point+2]);
|
||||
|
||||
eps->ContextRecord->Eip=codeloc;
|
||||
*(u32*)&pcode[-patch_point+2]=codeloc-(u32)&pcode[-patch_point+6];
|
||||
|
||||
SysPrintf("memop patch for full mapping @ %08X : pp %d\n",pcode,patch_point);
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (offset>=Ps2MemSize::Base)
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
|
||||
mmap_ClearCpuBlock( offset );
|
||||
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -24,23 +24,178 @@
|
|||
#include "iCore.h"
|
||||
#include "iR5900.h"
|
||||
|
||||
using namespace vtlb_private;
|
||||
u8* execohax_pos=0;
|
||||
u8* execohax_start=0;
|
||||
u32 execohx_sz;
|
||||
|
||||
// NOTICE: This function *destroys* EAX!!
|
||||
// Moves 128 bits of memory from the source register ptr to the dest register ptr.
|
||||
// (used as an equivalent to movaps, when a free XMM register is unavailable for some reason)
|
||||
void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm )
|
||||
u8* code_pos=0;
|
||||
u8* code_start=0;
|
||||
u32 code_sz;
|
||||
|
||||
using namespace vtlb_private;
|
||||
#include <windows.h>
|
||||
|
||||
void execuCode(bool set)
|
||||
{
|
||||
MOV32RmtoR(EAX,srcRm);
|
||||
MOV32RtoRm(destRm,EAX);
|
||||
MOV32RmtoROffset(EAX,srcRm,4);
|
||||
MOV32RtoRmOffset(destRm,EAX,4);
|
||||
MOV32RmtoROffset(EAX,srcRm,8);
|
||||
MOV32RtoRmOffset(destRm,EAX,8);
|
||||
MOV32RmtoROffset(EAX,srcRm,12);
|
||||
MOV32RtoRmOffset(destRm,EAX,12);
|
||||
u32 used=code_pos-code_start;
|
||||
u32 free=2*1024*1024-used;
|
||||
|
||||
if (code_pos == 0 || free<128)
|
||||
{
|
||||
SysPrintf("Leaking 2 megabytes of ram\n");
|
||||
code_start=code_pos=(u8*)VirtualAlloc(0,2*1024*1024,MEM_COMMIT,PAGE_EXECUTE_READWRITE);
|
||||
code_sz+=2*1024*1024;
|
||||
int i=0;
|
||||
while(i<code_sz)
|
||||
{
|
||||
//UD2 is 0xF 0xB.Fill the stream with it so that the cpu don't try to execute past branches ..
|
||||
code_start[i]=0xF;i++;
|
||||
code_start[i]=0xB;i++;
|
||||
}
|
||||
}
|
||||
|
||||
static u8* old;
|
||||
|
||||
if (set)
|
||||
{
|
||||
old=x86SetPtr(code_pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
code_pos=x86SetPtr(old);
|
||||
u32 tt=execohx_sz-2*1024*1024+(execohax_pos-execohax_start);
|
||||
u32 tc=code_sz-free;
|
||||
SysPrintf("%d code, %d pot, %.2f%%\n",tc,tt,tc/(float)tt*100);
|
||||
}
|
||||
}
|
||||
|
||||
u32* execohaxme(bool set)
|
||||
{
|
||||
u32 used=execohax_pos-execohax_start;
|
||||
u32 free=2*1024*1024-used;
|
||||
|
||||
if (execohax_pos == 0 || free<128)
|
||||
{
|
||||
SysPrintf("Leaking 2 megabytes of ram\n");
|
||||
execohax_start=execohax_pos=(u8*)VirtualAlloc(0,2*1024*1024,MEM_COMMIT,PAGE_EXECUTE_READWRITE);
|
||||
execohx_sz+=2*1024*1024;
|
||||
}
|
||||
static u8* saved;
|
||||
static u8* mod;
|
||||
if (set)
|
||||
{
|
||||
write8<_EmitterId_>( 0x81 );
|
||||
ModRM<_EmitterId_>( 3, 0, EAX );
|
||||
write32<_EmitterId_>( (uptr)execohax_pos );
|
||||
|
||||
saved=x86SetPtr(execohax_pos);
|
||||
mod=execohax_pos;
|
||||
write8<_EmitterId_>(0); //size, in bytes
|
||||
write32<_EmitterId_>(0); //return address
|
||||
}
|
||||
else
|
||||
{
|
||||
//x86AlignExecutable(4);
|
||||
//x86Align(64);
|
||||
execohax_pos=x86SetPtr(mod);
|
||||
write8<_EmitterId_>(execohax_pos-mod-5);
|
||||
return (u32*)x86SetPtr(saved);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uptr _vtlb_HandleRewrite(uptr block)
|
||||
{
|
||||
u8 size=*(u8*)block;
|
||||
u32 ra=*(u32*)(block+1);
|
||||
u8* pcode=(u8*)(block+5);
|
||||
execuCode(true);
|
||||
uptr rv=(uptr)code_pos;
|
||||
|
||||
while(size--)
|
||||
{
|
||||
write8<_EmitterId_>(*pcode++);
|
||||
}
|
||||
JMP32(ra-(uptr)x86Ptr[_EmitterId_]-5);
|
||||
|
||||
execuCode(false);
|
||||
//do magic
|
||||
return rv;
|
||||
}
|
||||
|
||||
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] );
|
||||
void MOVx_SSE( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAddr=0,bool half=false )
|
||||
{
|
||||
int reg;
|
||||
bool free_reg=false;
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
free_reg=true;
|
||||
reg=_allocTempXMMreg( XMMT_INT, -1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
SSE2_MOVDQA_XMM_to_M128((uptr)g_globalXMMData,XMM0);
|
||||
reg=XMM0;
|
||||
}
|
||||
|
||||
if (half)
|
||||
{
|
||||
if (srcAddr)
|
||||
SSE_MOVLPS_M64_to_XMM(reg,srcAddr);
|
||||
else
|
||||
SSE_MOVLPS_RmOffset_to_XMM(reg,srcRm,0);
|
||||
|
||||
if (dstAddr)
|
||||
SSE_MOVLPS_XMM_to_M64(dstAddr,reg);
|
||||
else
|
||||
SSE_MOVLPS_XMM_to_RmOffset(destRm,reg,0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (srcAddr)
|
||||
SSE2_MOVDQA_M128_to_XMM(reg,srcAddr);
|
||||
else
|
||||
SSE2_MOVDQARmtoROffset(reg,srcRm,0);
|
||||
|
||||
if (dstAddr)
|
||||
SSE2_MOVDQA_XMM_to_M128(dstAddr,reg);
|
||||
else
|
||||
SSE2_MOVDQARtoRmOffset(destRm,reg,0);
|
||||
}
|
||||
|
||||
|
||||
if (free_reg)
|
||||
_freeXMMreg(reg);
|
||||
else
|
||||
{
|
||||
SSE2_MOVDQA_M128_to_XMM(XMM0,(uptr)g_globalXMMData);
|
||||
}
|
||||
}
|
||||
void MOV64_MMX( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAddr=0)
|
||||
{
|
||||
//if free xmm && fpu state then we use the SSE version.
|
||||
if( !(_hasFreeXMMreg() && (x86FpuState == FPU_STATE)) && _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
if (srcAddr)
|
||||
MOVQMtoR(freereg,srcAddr);
|
||||
else
|
||||
MOVQRmtoROffset(freereg,srcRm,0);
|
||||
|
||||
if (dstAddr)
|
||||
MOVQRtoM(dstAddr,freereg);
|
||||
else
|
||||
MOVQRtoRmOffset(destRm,freereg,0);
|
||||
|
||||
_freeMMXreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVx_SSE(destRm,srcRm,srcAddr,dstAddr,true);
|
||||
}
|
||||
}
|
||||
/*
|
||||
// Pseudo-Code For the following Dynarec Implementations -->
|
||||
|
||||
|
@ -118,38 +273,11 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign )
|
|||
break;
|
||||
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoROffset(freereg,ECX,0);
|
||||
MOVQRtoRmOffset(EDX,freereg,0);
|
||||
_freeMMXreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,ECX);
|
||||
MOV32RtoRm(EDX,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,4);
|
||||
MOV32RtoRmOffset(EDX,EAX,4);
|
||||
}
|
||||
MOV64_MMX(EDX,ECX);
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoROffset(freereg,ECX,0);
|
||||
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
|
||||
_freeXMMreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV128_MtoM( EDX, ECX ); // dest <- src!
|
||||
}
|
||||
MOVx_SSE(EDX,ECX);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
|
@ -189,15 +317,16 @@ void vtlb_DynGenRead64(u32 bits)
|
|||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _fullread = JS8(0);
|
||||
//u8* _direct = JMP8(0);
|
||||
execohaxme(true);
|
||||
|
||||
_vtlb_DynGen_DirectRead( bits, false );
|
||||
u8* cont = JMP8(0);
|
||||
|
||||
x86SetJ8(_fullread);
|
||||
_vtlb_DynGen_IndirectRead( bits );
|
||||
|
||||
x86SetJ8(cont);
|
||||
|
||||
u32* patch=execohaxme(false);
|
||||
|
||||
_vtlb_DynGen_DirectRead( bits, false );
|
||||
|
||||
*patch=(uptr)x86Ptr[_EmitterId_];
|
||||
}
|
||||
|
||||
// Recompiled input registers:
|
||||
|
@ -211,12 +340,9 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
|
|||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _fullread = JS8(0);
|
||||
//u8* _direct = JMP8(0);
|
||||
execohaxme(true);
|
||||
|
||||
_vtlb_DynGen_DirectRead( bits, sign );
|
||||
u8* cont = JMP8(0);
|
||||
|
||||
x86SetJ8(_fullread);
|
||||
_vtlb_DynGen_IndirectRead( bits );
|
||||
|
||||
// perform sign extension on the result:
|
||||
|
@ -236,7 +362,11 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
|
|||
MOVZX32R16toR(EAX,EAX);
|
||||
}
|
||||
|
||||
x86SetJ8(cont);
|
||||
u32* patch=execohaxme(false);
|
||||
|
||||
_vtlb_DynGen_DirectRead( bits, sign );
|
||||
|
||||
*patch=(uptr)x86Ptr[_EmitterId_];
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -251,39 +381,11 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
|
|||
switch( bits )
|
||||
{
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQMtoR(freereg,ppf);
|
||||
MOVQRtoRmOffset(EDX,freereg,0);
|
||||
_freeMMXreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32MtoR(EAX,ppf);
|
||||
MOV32RtoRm(EDX,EAX);
|
||||
|
||||
MOV32MtoR(EAX,ppf+4);
|
||||
MOV32RtoRmOffset(EDX,EAX,4);
|
||||
}
|
||||
MOV64_MMX( EDX, ECX,ppf ); // dest <- src!
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQA_M128_to_XMM( freereg, ppf );
|
||||
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
|
||||
_freeXMMreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV32ItoR( ECX, ppf );
|
||||
MOV128_MtoM( EDX, ECX ); // dest <- src!
|
||||
}
|
||||
MOVx_SSE( EDX, ECX,ppf ); // dest <- src!
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
|
@ -403,40 +505,16 @@ static void _vtlb_DynGen_DirectWrite( u32 bits )
|
|||
break;
|
||||
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoROffset(freereg,EDX,0);
|
||||
MOVQRtoRmOffset(ECX,freereg,0);
|
||||
_freeMMXreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,EDX);
|
||||
MOV32RtoRm(ECX,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,EDX,4);
|
||||
MOV32RtoRmOffset(ECX,EAX,4);
|
||||
}
|
||||
MOV64_MMX( ECX, EDX );
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
|
||||
SSE2_MOVDQARtoRmOffset(ECX,freereg,0);
|
||||
_freeXMMreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV128_MtoM( ECX, EDX ); // dest <- src!
|
||||
}
|
||||
MOVx_SSE( ECX, EDX );
|
||||
break;
|
||||
}
|
||||
|
||||
// SHR32ItoR(ECX,4);// do /16
|
||||
// BTS_wtf(asdasd,ECX);
|
||||
}
|
||||
|
||||
static void _vtlb_DynGen_IndirectWrite( u32 bits )
|
||||
|
@ -464,15 +542,17 @@ void vtlb_DynGenWrite(u32 sz)
|
|||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _full=JS8(0);
|
||||
|
||||
_vtlb_DynGen_DirectWrite( sz );
|
||||
u8* cont = JMP8(0);
|
||||
//u8* _direct=JMP8(0);
|
||||
|
||||
x86SetJ8(_full);
|
||||
execohaxme(true);
|
||||
|
||||
_vtlb_DynGen_IndirectWrite( sz );
|
||||
|
||||
x86SetJ8(cont);
|
||||
|
||||
u32* patch=execohaxme(false);
|
||||
_vtlb_DynGen_DirectWrite( sz );
|
||||
|
||||
*patch=(uptr)x86Ptr[_EmitterId_];
|
||||
}
|
||||
|
||||
|
||||
|
@ -499,39 +579,11 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
|||
break;
|
||||
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoROffset(freereg,EDX,0);
|
||||
MOVQRtoM(ppf,freereg);
|
||||
_freeMMXreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,EDX);
|
||||
MOV32RtoM(ppf,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,EDX,4);
|
||||
MOV32RtoM(ppf+4,EAX);
|
||||
}
|
||||
MOV64_MMX( ECX, EDX,0,ppf); // dest <- src!
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
|
||||
SSE2_MOVDQA_XMM_to_M128(ppf,freereg);
|
||||
_freeXMMreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV32ItoR( ECX, ppf );
|
||||
MOV128_MtoM( ECX, EDX ); // dest <- src!
|
||||
}
|
||||
MOVx_SSE( ECX, EDX,0,ppf); // dest <- src!
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ emitterT void write64( u64 val ){
|
|||
//------------------------------------------------------------------
|
||||
// jump/align functions
|
||||
//------------------------------------------------------------------
|
||||
emitterT void ex86SetPtr( u8 *ptr );
|
||||
emitterT u8* ex86SetPtr( u8 *ptr );
|
||||
emitterT void ex86SetJ8( u8 *j8 );
|
||||
emitterT void ex86SetJ8A( u8 *j8 );
|
||||
emitterT void ex86SetJ16( u16 *j16 );
|
||||
|
|
|
@ -159,9 +159,12 @@ emitterT void CMOV32MtoR( int cc, int to, uptr from )
|
|||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
emitterT void ex86SetPtr( u8* ptr )
|
||||
emitterT u8* ex86SetPtr( u8* ptr )
|
||||
{
|
||||
x86Ptr[I] = ptr;
|
||||
u8* rv= x86Ptr[I];
|
||||
if (ptr!=0)
|
||||
x86Ptr[I] = ptr;
|
||||
return rv;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
|
Loading…
Reference in New Issue