mirror of https://github.com/PCSX2/pcsx2.git
Major bugfix/oopsie from r595 - I forgot to set the freezeregs flag, so XMM regs were getting corrupted liberally.
Added proper implementations for COP0's branching functions (BC0F, BC0T, etc). git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@606 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
1deab308d1
commit
1098253df7
|
@ -34,6 +34,45 @@ namespace Interp = R5900::Interpreter::OpcodeImpl::COP0;
|
||||||
|
|
||||||
namespace Dynarec {
|
namespace Dynarec {
|
||||||
namespace R5900 {
|
namespace R5900 {
|
||||||
|
|
||||||
|
// R5900 branch hepler!
|
||||||
|
// Recompiles code for a branch test and/or skip, complete with delay slot
|
||||||
|
// handling. Note, for "likely" branches use iDoBranchImm_Likely instead, which
|
||||||
|
// handles delay slots differently.
|
||||||
|
// Parameters:
|
||||||
|
// jmpSkip - This parameter is the result of the appropriate J32 instruction
|
||||||
|
// (usually JZ32 or JNZ32).
|
||||||
|
static void recDoBranchImm( u32* jmpSkip, bool isLikely = false )
|
||||||
|
{
|
||||||
|
// All R5900 branches use this format:
|
||||||
|
const u32 branchTo = (s32)_Imm_ * 4 + pc;
|
||||||
|
|
||||||
|
// First up is the Branch Taken Path : Save the recompiler's state, compile the
|
||||||
|
// DelaySlot, and issue a BranchTest insertion. The state is reloaded below for
|
||||||
|
// the "did not branch" path (maintains consts, register allocations, and other optimizations).
|
||||||
|
|
||||||
|
SaveBranchState();
|
||||||
|
recompileNextInstruction(1);
|
||||||
|
SetBranchImm(branchTo);
|
||||||
|
|
||||||
|
// Jump target when the branch is *not* taken, skips the branchtest code
|
||||||
|
// insertion above.
|
||||||
|
x86SetJ32(jmpSkip);
|
||||||
|
|
||||||
|
// if it's a likely branch then we'll need to skip the delay slot here, since
|
||||||
|
// MIPS cancels the delay slot instruction when branches aren't taken.
|
||||||
|
if( !isLikely ) pc -= 4; // instruction rewinde for delay slot ,if non-likely.
|
||||||
|
LoadBranchState();
|
||||||
|
recompileNextInstruction(1);
|
||||||
|
|
||||||
|
SetBranchImm(pc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void recDoBranchImm_Likely( u32* jmpSkip )
|
||||||
|
{
|
||||||
|
recDoBranchImm( jmpSkip, true );
|
||||||
|
}
|
||||||
|
|
||||||
namespace OpcodeImpl {
|
namespace OpcodeImpl {
|
||||||
namespace COP0 {
|
namespace COP0 {
|
||||||
|
|
||||||
|
@ -42,14 +81,55 @@ namespace COP0 {
|
||||||
* *
|
* *
|
||||||
*********************************************************/
|
*********************************************************/
|
||||||
|
|
||||||
void recBC0F() { recBranchCall( Interp::BC0F ); }
|
// emits "setup" code for a COP0 branch test. The instruction immediately following
|
||||||
void recBC0T() { recBranchCall( Interp::BC0T ); }
|
// this should be a conditional Jump -- JZ or JNZ normally.
|
||||||
void recBC0FL() { recBranchCall( Interp::BC0FL ); }
|
static void _setupBranchTest()
|
||||||
void recBC0TL() { recBranchCall( Interp::BC0TL ); }
|
{
|
||||||
void recTLBR() { recBranchCall( Interp::TLBR ); }
|
_eeFlushAllUnused();
|
||||||
void recTLBWI() { recBranchCall( Interp::TLBWI ); }
|
|
||||||
void recTLBWR() { recBranchCall( Interp::TLBWR ); }
|
// COP0 branch conditionals are based on the following equation:
|
||||||
void recTLBP() { recBranchCall( Interp::TLBP ); }
|
// (((psHu16(DMAC_STAT) & psHu16(DMAC_PCR)) & 0x3ff) == (psHu16(DMAC_PCR) & 0x3ff))
|
||||||
|
// BC0F checks if the statement is false, BC0T checks if the statement is true.
|
||||||
|
|
||||||
|
// note: We only want to compare the 16 bit values of DMAC_STAT and PCR.
|
||||||
|
// But using 32-bit loads here is ok (and faster), because we mask off
|
||||||
|
// everything except the lower 10 bits away.
|
||||||
|
|
||||||
|
MOV32MtoR( EAX, (uptr)&psHu32(DMAC_STAT) );
|
||||||
|
MOV32MtoR( ECX, (uptr)&psHu32(DMAC_PCR) );
|
||||||
|
AND32ItoR( EAX, 0x3ff ); // masks off all but lower 10 bits.
|
||||||
|
AND32ItoR( ECX, 0x3ff );
|
||||||
|
CMP32RtoR( EAX, ECX );
|
||||||
|
}
|
||||||
|
|
||||||
|
void recBC0F()
|
||||||
|
{
|
||||||
|
_setupBranchTest();
|
||||||
|
recDoBranchImm(JNZ32(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void recBC0T()
|
||||||
|
{
|
||||||
|
_setupBranchTest();
|
||||||
|
recDoBranchImm(JZ32(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void recBC0FL()
|
||||||
|
{
|
||||||
|
_setupBranchTest();
|
||||||
|
recDoBranchImm_Likely(JNZ32(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void recBC0TL()
|
||||||
|
{
|
||||||
|
_setupBranchTest();
|
||||||
|
recDoBranchImm_Likely(JZ32(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void recTLBR() { recCall( Interp::TLBR, -1 ); }
|
||||||
|
void recTLBP() { recCall( Interp::TLBP, -1 ); }
|
||||||
|
void recTLBWI() { recCall( Interp::TLBWI, -1 ); }
|
||||||
|
void recTLBWR() { recCall( Interp::TLBWR, -1 ); }
|
||||||
|
|
||||||
void recERET()
|
void recERET()
|
||||||
{
|
{
|
||||||
|
|
|
@ -117,7 +117,7 @@ static const char *txt1 = "REG[%d] = %x_%x\n";
|
||||||
static const char *txt2 = "M32 = %x\n";
|
static const char *txt2 = "M32 = %x\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void iBranchTest(u32 newpc, u32 cpuBranch);
|
static void iBranchTest(u32 newpc, bool noDispatch=false);
|
||||||
|
|
||||||
BASEBLOCKEX* PC_GETBLOCKEX(BASEBLOCK* p)
|
BASEBLOCKEX* PC_GETBLOCKEX(BASEBLOCK* p)
|
||||||
{
|
{
|
||||||
|
@ -686,7 +686,7 @@ static __declspec(naked,noreturn) void DispatcherClear()
|
||||||
// calc PC_GETBLOCK
|
// calc PC_GETBLOCK
|
||||||
s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
|
s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
|
||||||
|
|
||||||
if( s_pDispatchBlock->startpc == cpuRegs.pc )
|
if( s_pDispatchBlock != NULL && s_pDispatchBlock->startpc == cpuRegs.pc )
|
||||||
{
|
{
|
||||||
assert( s_pDispatchBlock->pFnptr != 0 );
|
assert( s_pDispatchBlock->pFnptr != 0 );
|
||||||
|
|
||||||
|
@ -725,7 +725,7 @@ static __declspec(naked,noreturn) void DispatcherReg()
|
||||||
{
|
{
|
||||||
s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
|
s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
|
||||||
|
|
||||||
if( s_pDispatchBlock->startpc != cpuRegs.pc )
|
if( s_pDispatchBlock == NULL || s_pDispatchBlock->startpc != cpuRegs.pc )
|
||||||
recRecompile(cpuRegs.pc);
|
recRecompile(cpuRegs.pc);
|
||||||
|
|
||||||
__asm
|
__asm
|
||||||
|
@ -750,8 +750,9 @@ __forceinline void recExecute()
|
||||||
// Optimization note : Compared pushad against manually pushing the regs one-by-one.
|
// Optimization note : Compared pushad against manually pushing the regs one-by-one.
|
||||||
// Manually pushing is faster, especially on Core2's and such. :)
|
// Manually pushing is faster, especially on Core2's and such. :)
|
||||||
do {
|
do {
|
||||||
__asm {
|
g_EEFreezeRegs = true;
|
||||||
|
__asm
|
||||||
|
{
|
||||||
push ebx
|
push ebx
|
||||||
push esi
|
push esi
|
||||||
push edi
|
push edi
|
||||||
|
@ -764,12 +765,14 @@ __forceinline void recExecute()
|
||||||
pop esi
|
pop esi
|
||||||
pop ebx
|
pop ebx
|
||||||
}
|
}
|
||||||
|
g_EEFreezeRegs = false;
|
||||||
}
|
}
|
||||||
while( !recEventTest() );
|
while( !recEventTest() );
|
||||||
}
|
}
|
||||||
|
|
||||||
static void recExecuteBlock()
|
static void recExecuteBlock()
|
||||||
{
|
{
|
||||||
|
g_EEFreezeRegs = true;
|
||||||
__asm
|
__asm
|
||||||
{
|
{
|
||||||
push ebx
|
push ebx
|
||||||
|
@ -784,6 +787,7 @@ static void recExecuteBlock()
|
||||||
pop esi
|
pop esi
|
||||||
pop ebx
|
pop ebx
|
||||||
}
|
}
|
||||||
|
g_EEFreezeRegs = false;
|
||||||
recEventTest();
|
recEventTest();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -958,7 +962,7 @@ void SetBranchReg( u32 reg )
|
||||||
|
|
||||||
iFlushCall(FLUSH_EVERYTHING);
|
iFlushCall(FLUSH_EVERYTHING);
|
||||||
|
|
||||||
iBranchTest(0xffffffff, 1);
|
iBranchTest(0xffffffff);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetBranchImm( u32 imm )
|
void SetBranchImm( u32 imm )
|
||||||
|
@ -971,7 +975,7 @@ void SetBranchImm( u32 imm )
|
||||||
MOV32ItoM( (uptr)&cpuRegs.pc, imm );
|
MOV32ItoM( (uptr)&cpuRegs.pc, imm );
|
||||||
iFlushCall(FLUSH_EVERYTHING);
|
iFlushCall(FLUSH_EVERYTHING);
|
||||||
|
|
||||||
iBranchTest(imm, imm <= pc);
|
iBranchTest(imm);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SaveBranchState()
|
void SaveBranchState()
|
||||||
|
@ -1111,7 +1115,17 @@ static u32 eeScaleBlockCycles()
|
||||||
return s_nBlockCycles >> (3+2);
|
return s_nBlockCycles >> (3+2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void iBranchTest(u32 newpc, u32 cpuBranch)
|
// Generates dynarec code for Event tests followed by a block dispatch (branch).
|
||||||
|
// Parameters:
|
||||||
|
// newpc - address to jump to at the end of the block. If newpc == 0xffffffff then
|
||||||
|
// the jump is assumed to be to a register (dynamic). For any other value the
|
||||||
|
// jump is assumed to be static, in which case the block will be "hardlinked" after
|
||||||
|
// the first time it's dispatched.
|
||||||
|
//
|
||||||
|
// noDispatch - When set true, the jump to Dispatcher. Used by the recs
|
||||||
|
// for blocks which perform exception checks without branching (it's enabled by
|
||||||
|
// setting "branch = 2";
|
||||||
|
static void iBranchTest(u32 newpc, bool noDispatch)
|
||||||
{
|
{
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
//CALLFunc((uptr)testfpu);
|
//CALLFunc((uptr)testfpu);
|
||||||
|
@ -1121,21 +1135,35 @@ static void iBranchTest(u32 newpc, u32 cpuBranch)
|
||||||
if( bExecBIOS ) CheckForBIOSEnd();
|
if( bExecBIOS ) CheckForBIOSEnd();
|
||||||
|
|
||||||
MOV32MtoR(EAX, (uptr)&cpuRegs.cycle);
|
MOV32MtoR(EAX, (uptr)&cpuRegs.cycle);
|
||||||
ADD32ItoR(EAX, eeScaleBlockCycles());
|
if( !noDispatch && newpc != 0xffffffff )
|
||||||
if( newpc != 0xffffffff )
|
|
||||||
{
|
{
|
||||||
|
// Optimization note: Instructions order to pair EDX with EAX's load above.
|
||||||
|
|
||||||
|
// Load EDX with the address of the JS32 jump below.
|
||||||
|
// We do this because the the Dispatcher will use this info to modify
|
||||||
|
// the JS instruction later on with the address of the block it's jumping
|
||||||
|
// to; creating a static link of blocks that doesn't require the overhead
|
||||||
|
// of a dispatcher.
|
||||||
MOV32ItoR(EDX, 0);
|
MOV32ItoR(EDX, 0);
|
||||||
ptr = (u32*)(x86Ptr-4);
|
ptr = (u32*)(x86Ptr-4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check the Event scheduler if our "cycle target" has been reached.
|
||||||
|
// Equiv code to:
|
||||||
|
// cpuRegs.cycle += blockcycles;
|
||||||
|
// if( cpuRegs.cycle > g_nextBranchCycle ) { DoEvents(); }
|
||||||
|
ADD32ItoR(EAX, eeScaleBlockCycles());
|
||||||
MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles
|
MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles
|
||||||
SUB32MtoR(EAX, (uptr)&g_nextBranchCycle);
|
SUB32MtoR(EAX, (uptr)&g_nextBranchCycle);
|
||||||
|
|
||||||
if( newpc != 0xffffffff )
|
if( newpc != 0xffffffff )
|
||||||
{
|
{
|
||||||
|
// This is the jump instruction which gets modified by Dispatcher.
|
||||||
*ptr = (u32)JS32((u32)Dispatcher - ( (u32)x86Ptr + 6 ));
|
*ptr = (u32)JS32((u32)Dispatcher - ( (u32)x86Ptr + 6 ));
|
||||||
}
|
}
|
||||||
else
|
else if( !noDispatch )
|
||||||
{
|
{
|
||||||
|
// This instruction is a dynamic link, so it's never modified.
|
||||||
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 ));
|
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 ));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1728,8 +1756,9 @@ void recRecompile( const u32 startpc )
|
||||||
goto StartRecomp;
|
goto StartRecomp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Fall through!
|
||||||
|
// COP0's branch opcodes line up with COP1 and COP2's
|
||||||
|
|
||||||
break;
|
|
||||||
case 17: // cp1
|
case 17: // cp1
|
||||||
case 18: // cp2
|
case 18: // cp2
|
||||||
if( _Rs_ == 8 ) {
|
if( _Rs_ == 8 ) {
|
||||||
|
@ -2023,15 +2052,24 @@ StartRecomp:
|
||||||
if( !(pc&0x10000000) )
|
if( !(pc&0x10000000) )
|
||||||
maxrecmem = std::max( (pc&~0xa0000000), maxrecmem );
|
maxrecmem = std::max( (pc&~0xa0000000), maxrecmem );
|
||||||
|
|
||||||
if( branch == 2 ) {
|
if( branch == 2 )
|
||||||
iFlushCall(FLUSH_EVERYTHING);
|
{
|
||||||
|
// Branch type 2 - This is how I "think" this works (air):
|
||||||
|
// Performs a branch/event test but does not actually "break" the block.
|
||||||
|
// This allows exceptions to be raised, and is thus sufficient for
|
||||||
|
// certain types of things like SYSCALL, EI, etc. but it is not sufficient
|
||||||
|
// for actual branching instructions.
|
||||||
|
|
||||||
iBranchTest(0xffffffff, 1);
|
iFlushCall(FLUSH_EVERYTHING);
|
||||||
|
iBranchTest(0xffffffff, true);
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
assert( branch != 3 );
|
assert( branch != 3 );
|
||||||
if( branch ) assert( !willbranch3 );
|
if( branch )
|
||||||
else ADD32ItoM((int)&cpuRegs.cycle, eeScaleBlockCycles() );
|
assert( !willbranch3 );
|
||||||
|
else
|
||||||
|
ADD32ItoM((int)&cpuRegs.cycle, eeScaleBlockCycles() );
|
||||||
|
|
||||||
if( willbranch3 ) {
|
if( willbranch3 ) {
|
||||||
BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock);
|
BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock);
|
||||||
|
@ -2088,7 +2126,6 @@ using namespace Dynarec::R5900;
|
||||||
|
|
||||||
namespace R5900
|
namespace R5900
|
||||||
{
|
{
|
||||||
|
|
||||||
R5900cpu recCpu = {
|
R5900cpu recCpu = {
|
||||||
recAlloc,
|
recAlloc,
|
||||||
recReset,
|
recReset,
|
||||||
|
|
Loading…
Reference in New Issue