Added an external exception check when the CPU writes to the FIFO. This allows the CPU time to service FIFO overflows. Fixes random hangs caused by FIFO overflows and desyncs like in "The Last Story" and "Battalion Wars 2". Thanks to marcosvitali for the research.

Fixes issue 5209.
Fixes issue 5150.
Fixes issue 5055.
Fixes issue 4889.
Fixes issue 4061.
Fixes issue 4010.
Fixes issue 3902.
This commit is contained in:
skidau 2012-03-02 18:53:41 +11:00
parent ee09def802
commit 9e398fd418
14 changed files with 75 additions and 42 deletions

View File

@ -19,9 +19,9 @@
#include "ChunkFile.h"
#include "ProcessorInterface.h"
#include "Memmap.h"
#include "../PowerPC/PowerPC.h"
#include "VideoBackendBase.h"
#include "../PowerPC/JitCommon/JitBase.h"
#include "../PowerPC/PowerPC.h"
#include "GPFifo.h"
@ -96,6 +96,16 @@ void STACKALIGN CheckGatherPipe()
// move back the spill bytes
memmove(m_gatherPipe, m_gatherPipe + cnt, m_gatherPipeCount);
// Profile where the FIFO writes are occurring.
const u32 addr = PC - 4;
if (jit && (jit->js.fifoWriteAddresses.find(addr)) == (jit->js.fifoWriteAddresses.end()))
{
jit->js.fifoWriteAddresses.insert(addr);
// Invalidate the JIT block so that it gets recompiled with the external exception check included.
jit->GetBlockCache()->InvalidateICache(addr, 8);
}
}
}

View File

@ -367,7 +367,7 @@ void Interpreter::dcbf(UGeckoInstruction _inst)
if (jit)
{
u32 address = Helper_Get_EA_X(_inst);
jit->GetBlockCache()->InvalidateICache(address & ~0x1f);
jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32);
}
}
@ -378,7 +378,7 @@ void Interpreter::dcbi(UGeckoInstruction _inst)
if (jit)
{
u32 address = Helper_Get_EA_X(_inst);
jit->GetBlockCache()->InvalidateICache(address & ~0x1f);
jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32);
}
}

View File

@ -41,6 +41,7 @@
#include "JitAsm.h"
#include "JitRegCache.h"
#include "Jit64_Tables.h"
#include "HW/ProcessorInterface.h"
using namespace Gen;
using namespace PowerPC;
@ -569,6 +570,24 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
SetJumpTarget(b1);
}
// Add an external exception check if the instruction writes to the FIFO.
if (jit->js.fifoWriteAddresses.find(js.compilerPC) != jit->js.fifoWriteAddresses.end())
{
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch noExtException = J_CC(CC_Z);
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP));
FixupBranch noCPInt = J_CC(CC_Z);
MOV(32, M(&PC), Imm32(js.compilerPC));
WriteExceptionExit();
SetJumpTarget(noCPInt);
SetJumpTarget(noExtException);
}
Jit64Tables::CompileInstruction(ops[i]);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE))

View File

@ -1260,8 +1260,8 @@ static const std::string opcodeNames[] = {
"FResult_End", "StorePaired", "StoreSingle", "StoreDouble", "StoreFReg",
"FDCmpCR", "CInt16", "CInt32", "SystemCall", "RFIExit",
"InterpreterBranch", "IdleBranch", "ShortIdleLoop",
"FPExceptionCheckStart", "FPExceptionCheckEnd", "ISIException", "Tramp",
"BlockStart", "BlockEnd", "Int3",
"FPExceptionCheckStart", "FPExceptionCheckEnd", "ISIException", "ExtExceptionCheck",
"Tramp", "BlockStart", "BlockEnd", "Int3",
};
static const unsigned alwaysUsedList[] = {
InterpreterFallback, StoreGReg, StoreCR, StoreLink, StoreCTR, StoreMSR,
@ -1269,7 +1269,7 @@ static const unsigned alwaysUsedList[] = {
Store16, Store32, StoreSingle, StoreDouble, StorePaired, StoreFReg, FDCmpCR,
BlockStart, BlockEnd, IdleBranch, BranchCond, BranchUncond, ShortIdleLoop,
SystemCall, InterpreterBranch, RFIExit, FPExceptionCheckStart,
FPExceptionCheckEnd, ISIException, Int3, Tramp, Nop
FPExceptionCheckEnd, ISIException, ExtExceptionCheck, Int3, Tramp, Nop
};
static const unsigned extra8RegList[] = {
LoadGReg, LoadCR, LoadGQR, LoadFReg, LoadFRegDENToZero,

View File

@ -165,10 +165,10 @@ enum Opcode {
ShortIdleLoop, // Idle loop seen in homebrew like wii mahjong,
// just a branch
// used for MMU, at least until someone
// used for exception checking, at least until someone
// has a better idea of integrating it
FPExceptionCheckStart, FPExceptionCheckEnd,
ISIException,
ISIException,ExtExceptionCheck,
// "Opcode" representing a register too far away to
// reference directly; this is a size optimization
Tramp,
@ -411,6 +411,9 @@ public:
InstLoc EmitISIException(InstLoc dest) {
return EmitUOp(ISIException, dest);
}
InstLoc EmitExtExceptionCheck(InstLoc pc) {
return EmitUOp(ExtExceptionCheck, pc);
}
InstLoc EmitRFIExit() {
return FoldZeroOp(RFIExit, 0);
}

View File

@ -50,6 +50,7 @@ The register allocation is linear scan allocation.
#include "../../../../Common/Src/CPUDetect.h"
#include "MathUtil.h"
#include "../../Core.h"
#include "HW/ProcessorInterface.h"
static ThunkManager thunks;
@ -761,6 +762,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
case FPExceptionCheckStart:
case FPExceptionCheckEnd:
case ISIException:
case ExtExceptionCheck:
case Int3:
case Tramp:
// No liveness effects
@ -1920,6 +1922,21 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
Jit->WriteExceptionExit();
break;
}
case ExtExceptionCheck: {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch noExtException = Jit->J_CC(CC_Z);
Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP));
FixupBranch noCPInt = Jit->J_CC(CC_Z);
Jit->MOV(32, M(&PC), Imm32(InstLoc));
Jit->WriteExceptionExit();
Jit->SetJumpTarget(noCPInt);
Jit->SetJumpTarget(noExtException);
break;
}
case Int3: {
Jit->INT3();
break;

View File

@ -649,6 +649,11 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
ibuild.EmitFPExceptionCheckStart(ibuild.EmitIntConst(ops[i].address));
}
if (jit->js.fifoWriteAddresses.find(js.compilerPC) != jit->js.fifoWriteAddresses.end())
{
ibuild.EmitExtExceptionCheck(ibuild.EmitIntConst(ops[i].address));
}
JitILTables::CompileInstruction(ops[i]);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE))

View File

@ -31,6 +31,8 @@
#include "PowerPCDisasm.h"
#include "disasm.h"
#include <set>
#define JIT_OPCODE 0
class JitBase : public CPUCoreBase, public EmuCodeBlock
@ -75,6 +77,8 @@ protected:
u8* rewriteStart;
JitBlock *curBlock;
std::set<u32> fifoWriteAddresses;
};
public:

View File

@ -390,13 +390,12 @@ bool JitBlock::ContainsAddress(u32 em_address)
}
void JitBlockCache::InvalidateICache(u32 address)
void JitBlockCache::InvalidateICache(u32 address, const u32 length)
{
address &= ~0x1f;
// destroy JIT blocks
// !! this works correctly under assumption that any two overlapping blocks end at the same address
std::map<pair<u32,u32>, u32>::iterator it1 = block_map.lower_bound(std::make_pair(address, 0)), it2 = it1, it;
while (it2 != block_map.end() && it2->first.second < address + 0x20)
while (it2 != block_map.end() && it2->first.second < address + length)
{
DestroyBlock(it2->second, true);
it2++;
@ -418,17 +417,17 @@ bool JitBlock::ContainsAddress(u32 em_address)
if (address & JIT_ICACHE_VMEM_BIT)
{
u32 cacheaddr = address & JIT_ICACHE_MASK;
memset(iCacheVMEM + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32);
memset(iCacheVMEM + cacheaddr, JIT_ICACHE_INVALID_BYTE, length);
}
else if (address & JIT_ICACHE_EXRAM_BIT)
{
u32 cacheaddr = address & JIT_ICACHEEX_MASK;
memset(iCacheEx + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32);
memset(iCacheEx + cacheaddr, JIT_ICACHE_INVALID_BYTE, length);
}
else
{
u32 cacheaddr = address & JIT_ICACHE_MASK;
memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32);
memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, length);
}
#endif
}

View File

@ -129,7 +129,7 @@ public:
CompiledCode GetCompiledCodeFromBlock(int block_num);
// DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateICache(u32 em_address);
void InvalidateICache(u32 address, const u32 length);
void DestroyBlock(int block_num, bool invalidate);
// Not currently used

View File

@ -110,7 +110,7 @@ namespace PowerPC
#endif
valid[set] = 0;
if (jit)
jit->GetBlockCache()->InvalidateICache(addr);
jit->GetBlockCache()->InvalidateICache(addr & ~0x1f, 32);
}
u32 InstructionCache::ReadInstruction(u32 addr)

View File

@ -60,7 +60,6 @@ volatile bool interruptSet= false;
volatile bool interruptWaiting= false;
volatile bool interruptTokenWaiting = false;
volatile bool interruptFinishWaiting = false;
volatile bool OnOverflow = false;
bool IsOnThread()
{
@ -92,7 +91,6 @@ void DoState(PointerWrap &p)
p.Do(interruptWaiting);
p.Do(interruptTokenWaiting);
p.Do(interruptFinishWaiting);
p.Do(OnOverflow);
}
inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);}
@ -135,7 +133,6 @@ void Init()
bProcessFifoToLoWatermark = false;
bProcessFifoAllDistance = false;
isPossibleWaitingSetDrawDone = false;
OnOverflow = false;
et_UpdateInterrupts = CoreTiming::RegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper);
}
@ -449,26 +446,7 @@ void STACKALIGN GatherPipeBursted()
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
if (!IsOnThread())
{
RunGpu();
}
else
{
if(fifo.CPReadWriteDistance == fifo.CPEnd - fifo.CPBase - 32)
{
if(!OnOverflow)
NOTICE_LOG(COMMANDPROCESSOR,"FIFO is almost in overflown, BreakPoint: %i", fifo.bFF_Breakpoint);
OnOverflow = true;
while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable &&
fifo.CPReadWriteDistance > fifo.CPEnd - fifo.CPBase - 64)
Common::YieldCPU();
}
else
{
OnOverflow = false;
}
}
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
"FIFO is overflown by GatherPipe !\nCPU thread is too fast!");

View File

@ -35,7 +35,6 @@ extern volatile bool interruptSet;
extern volatile bool interruptWaiting;
extern volatile bool interruptTokenWaiting;
extern volatile bool interruptFinishWaiting;
extern volatile bool OnOverflow;
// internal hardware addresses
enum

View File

@ -137,8 +137,7 @@ void RunGpuLoop()
CommandProcessor::SetCpStatus();
// check if we are able to run this buffer
while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable &&
fifo.CPReadWriteDistance && (!AtBreakpoint() || CommandProcessor::OnOverflow))
while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
if (!GpuRunningState) break;