From 3c51d6655af5796a7918f5b596dd6c2575232c72 Mon Sep 17 00:00:00 2001 From: refraction Date: Mon, 23 May 2011 22:19:49 +0000 Subject: [PATCH] EERec: Implemented some common cases of Load/Stores with shifts. Adds a bit of speed here and there. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4667 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86-32/iR5900-32.cpp | 4 +- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 632 ++++++++++++++++++++++---- 2 files changed, 535 insertions(+), 101 deletions(-) diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index cc8e509c38..82c9b09887 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1238,7 +1238,7 @@ void recompileNextInstruction(int delayslot) case 1: switch(_Rt_) { case 0: case 1: case 2: case 3: case 0x10: case 0x11: case 0x12: case 0x13: - Console.Warning("branch %x in delay slot!", cpuRegs.code); + Console.Warning("EE branch %x in delay slot!", cpuRegs.code); _clearNeededX86regs(); _clearNeededMMXregs(); _clearNeededXMMregs(); @@ -1247,7 +1247,7 @@ void recompileNextInstruction(int delayslot) break; case 2: case 3: case 4: case 5: case 6: case 7: case 0x14: case 0x15: case 0x16: case 0x17: - Console.Warning("branch %x in delay slot!", cpuRegs.code); + Console.Warning("EE branch %x in delay slot!", cpuRegs.code); _clearNeededX86regs(); _clearNeededMMXregs(); _clearNeededXMMregs(); diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 3339223a09..86423646a1 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -107,24 +107,23 @@ void recLoad64( u32 bits, bool sign ) // Load EDX with the destination. // 64/128 bit modes load the result directly into the cpuRegs.GPR struct. - - if( _Rt_ ) - MOV32ItoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - else - MOV32ItoR(EDX, (uptr)&dummyValue[0] ); + + if( GPR_IS_CONST1( _Rs_ ) ) { - iFlushCall(FLUSH_EXCEPTION); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); + //iFlushCall(FLUSH_EXCEPTION); + + //_deleteEEreg(_Rt_, 0); u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; if( bits == 128 ) srcadr &= ~0x0f; + _eeOnLoadWrite(_Rt_); + MOV32ItoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); vtlb_DynGenRead64_Const( bits, srcadr ); } else { - iFlushCall(FLUSH_EXCEPTION); + //iFlushCall(FLUSH_EXCEPTION); // Load ECX with the source memory address that we're reading from. _eeMoveGPRtoR(ECX, _Rs_); if ( _Imm_ != 0 ) @@ -133,7 +132,7 @@ void recLoad64( u32 bits, bool sign ) AND32ItoR(ECX,~0x0F); // emitter automatically encodes this as an 8-bit sign-extended imm8 _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); + MOV32ItoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); vtlb_DynGenRead64(bits); } @@ -149,40 +148,40 @@ void recLoad32( u32 bits, bool sign ) //write8(0xCC); // 8/16/32 bit modes return the loaded value in EAX. - + if( GPR_IS_CONST1( _Rs_ ) ) { iFlushCall(FLUSH_EXCEPTION); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); + + //_deleteEEreg(_Rt_, 0); u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; vtlb_DynGenRead32_Const( bits, sign, srcadr ); } else { - iFlushCall(FLUSH_EXCEPTION); + //iFlushCall(FLUSH_EXCEPTION); // Load ECX with the source memory address that we're reading from. _eeMoveGPRtoR(ECX, _Rs_); if ( _Imm_ != 0 ) ADD32ItoR( ECX, _Imm_ ); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); vtlb_DynGenRead32(bits, sign); } - if( _Rt_ ) - { + // EAX holds the loaded value, so sign extend as needed: - if (sign) - CDQ(); - else - XOR32RtoR(EDX,EDX); + if (sign) + CDQ(); + else + XOR32RtoR(EDX,EDX); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); - } + _eeOnLoadWrite(_Rt_); + _deleteEEreg(_Rt_, 0); + + MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); + MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); + } ////////////////////////////////////////////////////////////////////////////////////////// @@ -208,6 +207,7 @@ void recStore(u32 sz, bool edxAlreadyAssigned=false) else if (sz==128 || sz==64) { _deleteEEreg(_Rt_, 1); // flush register to mem + _eeOnLoadWrite(_Rt_); MOV32ItoR(EDX,(int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); } } @@ -217,14 +217,14 @@ void recStore(u32 sz, bool edxAlreadyAssigned=false) if( GPR_IS_CONST1( _Rs_ ) ) { - iFlushCall(FLUSH_EXCEPTION); + //iFlushCall(FLUSH_EXCEPTION); u32 dstadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; if( sz == 128 ) dstadr &= ~0x0f; vtlb_DynGenWrite_Const( sz, dstadr ); } else { - iFlushCall(FLUSH_EXCEPTION); + //iFlushCall(FLUSH_EXCEPTION); _eeMoveGPRtoR(ECX, _Rs_); if ( _Imm_ != 0 ) @@ -238,14 +238,14 @@ void recStore(u32 sz, bool edxAlreadyAssigned=false) ////////////////////////////////////////////////////////////////////////////////////////// // -void recLB( void ) { recLoad32(8,true); } -void recLBU( void ) { recLoad32(8,false); } -void recLH( void ) { recLoad32(16,true); } -void recLHU( void ) { recLoad32(16,false); } -void recLW( void ) { recLoad32(32,true); } -void recLWU( void ) { recLoad32(32,false); } -void recLD( void ) { recLoad64(64,false); } -void recLQ( void ) { recLoad64(128,false); } +void recLB( void ) { if(_Rt_) recLoad32(8,true); } +void recLBU( void ) { if(_Rt_) recLoad32(8,false); } +void recLH( void ) { if(_Rt_) recLoad32(16,true); } +void recLHU( void ) { if(_Rt_) recLoad32(16,false); } +void recLW( void ) { if(_Rt_) recLoad32(32,true); } +void recLWU( void ) { if(_Rt_) recLoad32(32,false); } +void recLD( void ) { if(_Rt_) recLoad64(64,false); } +void recLQ( void ) { if(_Rt_) recLoad64(128,false); } void recSB( void ) { recStore(8); } void recSH( void ) { recStore(16); } @@ -258,25 +258,163 @@ void recSD( void ) { recStore(64); } // (LWL/SWL, LWR/SWR, etc) //////////////////////////////////////////////////// + +static const s32 LWL_MASK[4] = { 0xffffff, 0x0000ffff, 0x000000ff, 0x00000000 }; +static const s32 LWR_MASK[4] = { 0x000000, 0xff000000, 0xffff0000, 0xffffff00 }; +static const u8 LWL_SHIFT[4] = { 24, 16, 8, 0 }; +static const u8 LWR_SHIFT[4] = { 0, 8, 16, 24 }; + void recLWL( void ) { + if(!_Rt_) return; + + if( GPR_IS_CONST1( _Rs_ ) ) + { + //iFlushCall(FLUSH_EXCEPTION); + u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + u32 shift = srcadr & 0x3; + srcadr &= ~0x3; + + vtlb_DynGenRead32_Const( 32, true, srcadr ); + + if( GPR_IS_CONST1( _Rt_ ) ) + { + int res = g_cpuConstRegs[_Rt_].UL[0] & LWL_MASK[shift]; + MOV32ItoR( EDX, res ); + } + else + { + _eeMoveGPRtoR(EDX, _Rt_); + AND32ItoR( EDX, LWL_MASK[shift] ); + } + + SHL32ItoR( EAX, LWL_SHIFT[shift] ); + OR32RtoR( EAX, EDX ); + } + else + { + + //iFlushCall(FLUSH_EXCEPTION); + // Load ECX with the source memory address that we're reading from. + _eeMoveGPRtoR(ECX, _Rs_); + if ( _Imm_ != 0 ) + ADD32ItoR( ECX, _Imm_ ); + + MOV32RtoR(EDX, ECX); + AND32ItoR(EDX, 0x3); + SHL32ItoR(EDX, 3); + + AND32ItoR(ECX,~0x3); + + + vtlb_DynGenRead32(32, true); + + MOV32RtoR(ECX, EDX); + MOV32ItoR( EBX, 0xffffff ); + SHR32CLtoR( EBX ); + + MOV32ItoR( ECX, 24 ); + SUB32RtoR(ECX, EDX); + SHL32CLtoR( EAX );// eax << ecx + + _eeMoveGPRtoR(EDX, _Rt_); + + AND32RtoR( EDX, EBX ); + + OR32RtoR( EAX, EDX ); + } + + _eeOnLoadWrite(_Rt_); + _deleteEEreg(_Rt_, 0); + // EAX holds the loaded value, so sign extend as needed: + CDQ(); + + MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); + MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); + + /* iFlushCall(FLUSH_EXCEPTION); _deleteEEreg(_Rs_, 1); _eeOnLoadWrite(_Rt_); _deleteEEreg(_Rt_, 1); - recCall(LWL); + recCall(LWL);*/ } //////////////////////////////////////////////////// void recLWR( void ) { - iFlushCall(FLUSH_EXCEPTION); + if(!_Rt_) return; + + if( GPR_IS_CONST1( _Rs_ ) ) + { + u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + u32 shift = srcadr & 0x3; + srcadr &= ~0x3; + + vtlb_DynGenRead32_Const( 32, true, srcadr ); + + if( GPR_IS_CONST1( _Rt_ ) ) + { + int res = g_cpuConstRegs[_Rt_].UL[0] & LWR_MASK[shift]; + MOV32ItoR( EDX, res ); + } + else + { + _eeMoveGPRtoR(EDX, _Rt_); + AND32ItoR( EDX, LWR_MASK[shift] ); + } + + SHR32ItoR( EAX, LWR_SHIFT[shift] ); + OR32RtoR( EAX, EDX ); + } + else + { + + //iFlushCall(FLUSH_EXCEPTION); + // Load ECX with the source memory address that we're reading from. + _eeMoveGPRtoR(ECX, _Rs_); + if ( _Imm_ != 0 ) + ADD32ItoR( ECX, _Imm_ ); + + MOV32RtoR(EDX, ECX); + AND32ItoR(EDX, 0x3); + SHL32ItoR(EDX, 3); + + AND32ItoR(ECX,~0x3); + + vtlb_DynGenRead32(32, true); + MOV32RtoR(ECX, EDX); + MOV32ItoR( EBX, 0xffffff00 ); + SHR32CLtoR( EAX ); // eax << ecx + + MOV32ItoR( ECX, 24 ); + SUB32RtoR(ECX, EDX); + SHL32CLtoR( EBX );// ebx << ecx + + _eeMoveGPRtoR(EDX, _Rt_); + + AND32RtoR( EDX, EBX ); + + OR32RtoR( EAX, EDX ); + } + + //Now we've used the const reg we can delete it. + _eeOnLoadWrite(_Rt_); + _deleteEEreg(_Rt_, 0); + // EAX holds the loaded value, so sign extend as needed: + CDQ(); + + MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); + MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); + + + /*iFlushCall(FLUSH_EXCEPTION); _deleteEEreg(_Rs_, 1); _eeOnLoadWrite(_Rt_); _deleteEEreg(_Rt_, 1); - recCall(LWR); + recCall(LWR);*/ } static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0x00000000 }; @@ -288,87 +426,346 @@ static const u8 SWL_SHIFT[4] = { 24, 16, 8, 0 }; //////////////////////////////////////////////////// void recSWL( void ) { + /*iFlushCall(FLUSH_EXCEPTION); + _deleteEEreg(_Rs_, 1); + _deleteEEreg(_Rt_, 1); + recCall(SWL);*/ // Perform a translated memory read, followed by a translated memory write // of the "merged" result. - + // NOTE: Code incomplete. I'll fix/finish it soon. --air - if( 0 ) //GPR_IS_CONST1( _Rs_ ) ) + if( GPR_IS_CONST1( _Rs_ ) ) { - iFlushCall(FLUSH_EXCEPTION); - _eeOnLoadWrite(_Rt_); - //_deleteEEreg(_Rt_, 0); - u32 addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; u32 shift = addr & 3; - vtlb_DynGenRead32_Const( 32, false, addr & 3 ); + vtlb_DynGenRead32_Const( 32, false, addr & ~3 ); // Prep eax/edx for producing the writeback result: - // equiv to: (cpuRegs.GPR.r[_Rt_].UL[0] >> SWL_SHIFT[shift]) | (mem & SWL_MASK[shift]) + if(_Rt_) + { + if( GPR_IS_CONST1( _Rt_ ) ) + { + int res; + res = g_cpuConstRegs[_Rt_].UL[0] >> SWL_SHIFT[shift]; + } + else + { + _eeMoveGPRtoR(EDX, _Rt_); + SHR32ItoR( EDX, SWL_SHIFT[shift] ); + } + } + else XOR32RtoR( EDX, EDX ); - //_deleteEEreg(_Rt_, 1); - //MOV32MtoR( EDX, (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - - _eeMoveGPRtoR(EDX, _Rt_); - AND32ItoR( EAX, SWL_MASK[shift] ); - SHR32ItoR( EDX, SWL_SHIFT[shift] ); + AND32ItoR( EAX, SWL_MASK[shift] ); OR32RtoR( EDX, EAX ); - recStore( 32, true ); + vtlb_DynGenWrite_Const( 32, addr & ~0x3 ); } else { - iFlushCall(FLUSH_EXCEPTION); - _deleteEEreg(_Rs_, 1); - _deleteEEreg(_Rt_, 1); - recCall(SWL); + + //iFlushCall(FLUSH_EXCEPTION); + _eeMoveGPRtoR(ECX, _Rs_); + + if ( _Imm_ != 0 ) + ADD32ItoR(ECX, _Imm_); + + + MOV32RtoR(EDX, ECX); + AND32ItoR(EDX, 0x3); + SHL32ItoR(EDX, 3); + + AND32ItoR(ECX,~0x3); + + vtlb_DynGenRead32(32, false); + MOV32RtoR( ECX, EDX ); + MOV32ItoR( EBX, 0xffffff00 ); + SHL32CLtoR( EBX ); // ebx << ecx + AND32RtoR( EAX, EBX ); + if(_Rt_) + { + MOV32ItoR( ECX, 24 ); + SUB32RtoR( ECX, EDX ); + + _eeMoveGPRtoR(EDX, _Rt_); + SHR32CLtoR( EDX ); // edx >> ecx + } + else XOR32RtoR( EDX, EDX ); + OR32RtoR( EDX, EAX ); + + _eeMoveGPRtoR(ECX, _Rs_); + + if ( _Imm_ != 0 ) + ADD32ItoR(ECX, _Imm_); + + AND32ItoR(ECX,~0x3); + + vtlb_DynGenWrite(32); } } //////////////////////////////////////////////////// void recSWR( void ) { - iFlushCall(FLUSH_EXCEPTION); + /*iFlushCall(FLUSH_EXCEPTION); _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); - recCall(SWR); + recCall(SWR);*/ + + + if( GPR_IS_CONST1( _Rs_ ) ) + { + //iFlushCall(FLUSH_EXCEPTION); + + + u32 addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + u32 shift = addr & 3; + vtlb_DynGenRead32_Const( 32, false, addr & ~3 ); + + // Prep eax/edx for producing the writeback result: + + if(_Rt_) + { + if( GPR_IS_CONST1( _Rt_ ) ) + { + int res; + res = g_cpuConstRegs[_Rt_].UL[0] << SWR_SHIFT[shift]; + MOV32ItoR( EDX, res ); + } + else + { + _eeMoveGPRtoR(EDX, _Rt_); + SHL32ItoR( EDX, SWR_SHIFT[shift] ); + } + } + else XOR32RtoR( EDX, EDX ); + AND32ItoR( EAX, SWR_MASK[shift] ); + OR32RtoR( EDX, EAX ); + + //iFlushCall(FLUSH_EXCEPTION); + vtlb_DynGenWrite_Const( 32, addr & ~0x3 ); + } + else + { + //iFlushCall(FLUSH_EXCEPTION); + _eeMoveGPRtoR(ECX, _Rs_); + + if ( _Imm_ != 0 ) + ADD32ItoR(ECX, _Imm_); + MOV32RtoR(EDX, ECX); //Move to EBX for shift + AND32ItoR(ECX,~0x3); //Mask final bit of address + + vtlb_DynGenRead32(32, false); //Read in to EAX + + AND32ItoR(EDX, 0x3); //Mask shift bits + SHL32ItoR(EDX, 3); //Multiply by 8 + + if(_Rt_) + { + MOV32RtoR( ECX, EDX ); //Copy shift in to ECX + _eeMoveGPRtoR(EBX, _Rt_); //Move Rt in to EDX + SHL32CLtoR( EBX ); // Rt << shift (ecx) + } + else XOR32RtoR( EBX, EBX ); + MOV32ItoR( ECX, 24 ); //Move 24 in to ECX + SUB32RtoR( ECX, EDX ); //Take the shift from it (so if shift is 1, itll do 24 - 8 = 16) + MOV32ItoR( EDX, 0xffffff ); //Move the mask in to where the shift was + SHR32CLtoR( EDX ); // mask >> 24-shift + + AND32RtoR( EAX, EDX ); //And the Mask with the original memory in EAX + + OR32RtoR( EBX, EAX ); //Or our result of the _Rt_ shift to it + MOV32RtoR( EDX, EBX ); + _eeMoveGPRtoR(ECX, _Rs_); + + if ( _Imm_ != 0 ) + ADD32ItoR(ECX, _Imm_); + + AND32ItoR(ECX,~0x3); + + //EDX holds data to be written back + vtlb_DynGenWrite(32); //Write back to memory + } } //////////////////////////////////////////////////// void recLDL( void ) { - iFlushCall(FLUSH_EXCEPTION); + /*iFlushCall(FLUSH_EXCEPTION); _deleteEEreg(_Rs_, 1); _eeOnLoadWrite(_Rt_); _deleteEEreg(_Rt_, 1); - recCall(LDL); + recCall(LDL);*/ + if(!_Rt_) return; + + //_eeOnLoadWrite(_Rt_); + + if( GPR_IS_CONST1( _Rs_ ) ) + { + u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + u32 shift = srcadr & 0x7; + + _deleteEEreg(_Rt_, 0); + _eeOnLoadWrite(_Rt_); + + if(shift == 7) + { + srcadr &= ~0x7; + MOV32ItoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); + + vtlb_DynGenRead64_Const( 64, srcadr ); + } + else if(shift == 3) + { + srcadr &= ~0x7; + vtlb_DynGenRead32_Const( 32, false, srcadr ); + MOV32RtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] , EAX); + + } + else + { + //DevCon.Warning("LDL Const Interpreter Drop Back %x", shift); + recCall(LDL); + } + } + else + { + _deleteEEreg(_Rt_, 0); + _eeOnLoadWrite(_Rt_); + recCall(LDL); + } } //////////////////////////////////////////////////// void recLDR( void ) { - iFlushCall(FLUSH_EXCEPTION); + + /*iFlushCall(FLUSH_EXCEPTION); _deleteEEreg(_Rs_, 1); _eeOnLoadWrite(_Rt_); _deleteEEreg(_Rt_, 1); - recCall(LDR); + recCall(LDR);*/ + if(!_Rt_) return; + + + if( GPR_IS_CONST1( _Rs_ ) ) + { + u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + u32 shift = srcadr & 0x7; + _deleteEEreg(_Rt_, 0); + _eeOnLoadWrite(_Rt_); + + if(shift == 0) + { + MOV32ItoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); + + vtlb_DynGenRead64_Const( 64, srcadr ); + } + else if(shift == 4) + { + vtlb_DynGenRead32_Const( 32, false, srcadr ); + MOV32RtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] , EAX); + } + else + { + //DevCon.Warning("LDR Const Interpreter Drop Back %x", shift); + recCall(LDR); + } + } + else + { + _deleteEEreg(_Rt_, 0); + _eeOnLoadWrite(_Rt_); + recCall(LDR); + } } //////////////////////////////////////////////////// + +static const u64 SDL_MASK[8] = +{ 0xffffffffffffff00LL, 0xffffffffffff0000LL, 0xffffffffff000000LL, 0xffffffff00000000LL, + 0xffffff0000000000LL, 0xffff000000000000LL, 0xff00000000000000LL, 0x0000000000000000LL +}; +static const u64 SDR_MASK[8] = +{ 0x0000000000000000LL, 0x00000000000000ffLL, 0x000000000000ffffLL, 0x0000000000ffffffLL, + 0x00000000ffffffffLL, 0x000000ffffffffffLL, 0x0000ffffffffffffLL, 0x00ffffffffffffffLL +}; + void recSDL( void ) { - iFlushCall(FLUSH_EXCEPTION); + + /*iFlushCall(FLUSH_EXCEPTION); _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); - recCall(SDL); + recCall(SDL);*/ + + if( GPR_IS_CONST1( _Rs_ ) ) + { + u32 addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + u32 shift = addr & 7; + if(shift == 7) + { + if( GPR_IS_CONST1( _Rt_ ) ) MOV32ItoR(EDX, (uptr)&g_cpuConstRegs[_Rt_].UL[0]); + else MOV32ItoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); + vtlb_DynGenWrite_Const( 64, addr & ~0x7 ); + } + else if(shift == 3) + { + if( GPR_IS_CONST1( _Rt_ ) ) MOV32ItoR(EDX, (uptr)&g_cpuConstRegs[_Rt_].UL[1]); + else MOV32MtoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ); + vtlb_DynGenWrite_Const( 32, addr & ~0x7 ); + } + else + { + //DevCon.Warning("SDL Const Interpreter Drop Back %x", shift); + iFlushCall(FLUSH_INTERPRETER); + recCall(SDL); + } + } + else + { + iFlushCall(FLUSH_INTERPRETER); + recCall(SDL); + + } } //////////////////////////////////////////////////// void recSDR( void ) { - iFlushCall(FLUSH_EXCEPTION); - _deleteEEreg(_Rs_, 1); - _deleteEEreg(_Rt_, 1); - recCall(SDR); + if( GPR_IS_CONST1( _Rs_ ) ) + { + //iFlushCall(FLUSH_EXCEPTION); + //_eeOnLoadWrite(_Rt_); + //_deleteEEreg(_Rt_, 1); + + u32 addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + u32 shift = addr & 7; + + if(shift == 0) + { + if( GPR_IS_CONST1( _Rt_ ) ) MOV32ItoR(EDX, (uptr)&g_cpuConstRegs[_Rt_].UL[0]); + else MOV32ItoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); + vtlb_DynGenWrite_Const( 64, addr ); + } + else if(shift == 4) + { + if( GPR_IS_CONST1( _Rt_ ) ) MOV32ItoR(EDX, (uptr)&g_cpuConstRegs[_Rt_].UL[0]); + else MOV32MtoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); + vtlb_DynGenWrite_Const( 32, addr ); + } + else + { + //DevCon.Warning("SDR Const Interpreter Drop Back %x", shift); + iFlushCall(FLUSH_INTERPRETER); + recCall(SDR); + } + } + else + { + iFlushCall(FLUSH_INTERPRETER); + recCall(SDR); + } } ////////////////////////////////////////////////////////////////////////////////////////// @@ -380,31 +777,48 @@ void recSDR( void ) //////////////////////////////////////////////////// void recLWC1( void ) { - iFlushCall(FLUSH_EXCEPTION); - _deleteEEreg(_Rs_, 1); + //iFlushCall(FLUSH_EXCEPTION); + //_deleteEEreg(_Rs_, 1); _deleteFPtoXMMreg(_Rt_, 2); - MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - ADD32ItoR( ECX, _Imm_ ); - - vtlb_DynGenRead32(32, false); + if( GPR_IS_CONST1( _Rs_ ) ) + { + int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + vtlb_DynGenRead32_Const(32, false, addr); + } + else + { + MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); + if ( _Imm_ != 0 ) + ADD32ItoR( ECX, _Imm_ ); + vtlb_DynGenRead32(32, false); + } + MOV32RtoM( (int)&fpuRegs.fpr[ _Rt_ ].UL, EAX ); } //////////////////////////////////////////////////// void recSWC1( void ) { - iFlushCall(FLUSH_EXCEPTION); - _deleteEEreg(_Rs_, 1); - _deleteFPtoXMMreg(_Rt_, 0); - - MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - ADD32ItoR( ECX, _Imm_ ); + //iFlushCall(FLUSH_EXCEPTION); + //_deleteEEreg(_Rs_, 1); + _deleteFPtoXMMreg(_Rt_, 1); MOV32MtoR(EDX, (int)&fpuRegs.fpr[ _Rt_ ].UL ); - vtlb_DynGenWrite(32); + + if( GPR_IS_CONST1( _Rs_ ) ) + { + int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + vtlb_DynGenWrite_Const(32, addr); + } + else + { + MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); + if ( _Imm_ != 0 ) + ADD32ItoR( ECX, _Imm_ ); + vtlb_DynGenWrite(32); + } + } //////////////////////////////////////////////////// @@ -420,35 +834,55 @@ void recSWC1( void ) void recLQC2( void ) { - iFlushCall(FLUSH_EXCEPTION); - _deleteEEreg(_Rs_, 1); - _deleteVFtoXMMreg(_Ft_, 0, 2); + //iFlushCall(FLUSH_EXCEPTION); - MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - ADD32ItoR( ECX, _Imm_); + //_deleteEEreg(_Rs_, 1); + _deleteVFtoXMMreg(_Ft_, 0, 2); if ( _Rt_ ) MOV32ItoR(EDX, (int)&VU0.VF[_Ft_].UD[0] ); else MOV32ItoR(EDX, (int)&dummyValue[0] ); - vtlb_DynGenRead64(128); + if( GPR_IS_CONST1( _Rs_ ) ) + { + int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + + vtlb_DynGenRead64_Const(128, addr); + } + else + { + MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); + + if ( _Imm_ != 0 ) + ADD32ItoR( ECX, _Imm_); + + vtlb_DynGenRead64(128); + } } //////////////////////////////////////////////////// void recSQC2( void ) { - iFlushCall(FLUSH_EXCEPTION); - _deleteEEreg(_Rs_, 1); - _deleteVFtoXMMreg(_Ft_, 0, 0); - - MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - ADD32ItoR( ECX, _Imm_ ); + //iFlushCall(FLUSH_EXCEPTION); + //_deleteEEreg(_Rs_, 1); + _deleteVFtoXMMreg(_Ft_, 0, 1); //Want to flush it but not clear it MOV32ItoR(EDX, (int)&VU0.VF[_Ft_].UD[0] ); - vtlb_DynGenWrite(128); + if( GPR_IS_CONST1( _Rs_ ) ) + { + int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + vtlb_DynGenWrite_Const(128, addr); + } + else + { + MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); + if ( _Imm_ != 0 ) + ADD32ItoR( ECX, _Imm_ ); + + vtlb_DynGenWrite(128); + } + } #endif