Merge pull request #1012 from skidau/aram-dma-exceptions

Compile the ARAM DMA exception checks into the JIT block
This commit is contained in:
skidau 2014-09-28 14:48:38 +10:00
commit afccf2276d
12 changed files with 127 additions and 50 deletions

View File

@ -37,6 +37,7 @@
#include "Core/HW/Memmap.h"
#include "Core/HW/MMIO.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
namespace DSP
@ -157,6 +158,9 @@ static ARAMInfo g_ARAM;
static DSPState g_dspState;
static AudioDMA g_audioDMA;
static ARAM_DMA g_arDMA;
static u32 last_mmaddr;
static u32 last_aram_dma_count;
static bool instant_dma;
union ARAM_Info
{
@ -194,6 +198,9 @@ void DoState(PointerWrap &p)
p.Do(g_AR_MODE);
p.Do(g_AR_REFRESH);
p.Do(dsp_slice);
p.Do(last_mmaddr);
p.Do(last_aram_dma_count);
p.Do(instant_dma);
dsp_emulator->DoState(p);
}
@ -212,6 +219,12 @@ static void CompleteARAM(u64 userdata, int cyclesLate)
GenerateDSPInterrupt(INT_ARAM);
}
void EnableInstantDMA()
{
CoreTiming::RemoveEvent(et_CompleteARAM);
CompleteARAM(0, 0);
instant_dma = true;
}
DSPEmulator *GetDSPEmulator()
{
@ -249,6 +262,11 @@ void Init(bool hle)
g_AR_MODE = 1; // ARAM Controller has init'd
g_AR_REFRESH = 156; // 156MHz
instant_dma = false;
last_aram_dma_count = 0;
last_mmaddr = 0;
et_GenerateDSPInterrupt = CoreTiming::RegisterEvent("DSPint", GenerateDSPInterrupt);
et_CompleteARAM = CoreTiming::RegisterEvent("ARAMint", CompleteARAM);
}
@ -516,28 +534,20 @@ void UpdateAudioDMA()
static void Do_ARAM_DMA()
{
g_dspState.DSPControl.DMAState = 1;
if (g_arDMA.Cnt.count == 32)
{
// Beyond Good and Evil (GGEE41) sends count 32
// Lost Kingdoms 2 needs the exception check here in DSP HLE mode
CompleteARAM(0, 0);
CoreTiming::ForceExceptionCheck(100);
}
else
{
CoreTiming::ScheduleEvent_Threadsafe(0, et_CompleteARAM);
// Force an early exception check on large transfers. Fixes RE2 audio.
// NFS:HP2 (<= 6144)
// Viewtiful Joe (<= 6144)
// Sonic Mega Collection (> 2048)
// Paper Mario battles (> 32)
// Mario Super Baseball (> 32)
// Knockout Kings 2003 loading (> 32)
// WWE DOR (> 32)
if (g_arDMA.Cnt.count > 2048 && g_arDMA.Cnt.count <= 6144)
CoreTiming::ForceExceptionCheck(100);
}
// ARAM DMA transfer rate has been measured on real hw
int ticksToTransfer = (g_arDMA.Cnt.count / 32) * 246;
if (instant_dma)
ticksToTransfer = 0;
CoreTiming::ScheduleEvent_Threadsafe(ticksToTransfer, et_CompleteARAM);
if (instant_dma)
CoreTiming::ForceExceptionCheck(100);
last_mmaddr = g_arDMA.MMAddr;
last_aram_dma_count = g_arDMA.Cnt.count;
// Real hardware DMAs in 32byte chunks, but we can get by with 8byte chunks
if (g_arDMA.Cnt.dir)
@ -663,5 +673,14 @@ u8 *GetARAMPtr()
return g_ARAM.ptr;
}
u64 DMAInProgress()
{
if (g_dspState.DSPControl.DMAState == 1)
{
return ((u64)last_mmaddr << 32 | (last_mmaddr + last_aram_dma_count));
}
return 0;
}
} // end of namespace DSP

View File

@ -76,5 +76,7 @@ u8* GetARAMPtr();
void UpdateAudioDMA();
void UpdateDSPSlice(int cycles);
u64 DMAInProgress();
void EnableInstantDMA();
}// end of namespace DSP

View File

@ -8,8 +8,8 @@
#include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "VideoCommon/VideoBackendBase.h"
@ -86,18 +86,7 @@ void STACKALIGN CheckGatherPipe()
memmove(m_gatherPipe, m_gatherPipe + cnt, m_gatherPipeCount);
// Profile where the FIFO writes are occurring.
if (jit && PC != 0 && (jit->js.fifoWriteAddresses.find(PC)) == (jit->js.fifoWriteAddresses.end()))
{
// Log only stores, fp stores and ps stores, filtering out other instructions arrived via optimizeGatherPipe
int type = GetOpInfo(Memory::ReadUnchecked_U32(PC))->type;
if (type == OPTYPE_STORE || type == OPTYPE_STOREFP || (type == OPTYPE_PS && !strcmp(GetOpInfo(Memory::ReadUnchecked_U32(PC))->opname, "psq_st")))
{
jit->js.fifoWriteAddresses.insert(PC);
// Invalidate the JIT block so that it gets recompiled with the external exception check included.
jit->GetBlockCache()->InvalidateICache(PC, 4);
}
}
JitInterface::CompileExceptionCheck(JitInterface::EXCEPTIONS_FIFO_WRITE);
}
}

View File

@ -5,6 +5,7 @@
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "Core/HW/DSP.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/Interpreter/Interpreter.h"
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
@ -325,24 +326,40 @@ void Interpreter::dcbf(UGeckoInstruction _inst)
{
NPC = PC + 12;
}*/
u32 address = Helper_Get_EA_X(_inst);
JitInterface::InvalidateICache(address & ~0x1f, 32);
u32 address = Helper_Get_EA_X(_inst);
JitInterface::InvalidateICache(address & ~0x1f, 32);
}
void Interpreter::dcbi(UGeckoInstruction _inst)
{
// Removes a block from data cache. Since we don't emulate the data cache, we don't need to do anything to the data cache
// However, we invalidate the jit block cache on dcbi
u32 address = Helper_Get_EA_X(_inst);
JitInterface::InvalidateICache(address & ~0x1f, 32);
u32 address = Helper_Get_EA_X(_inst);
JitInterface::InvalidateICache(address & ~0x1f, 32);
// The following detects a situation where the game is writing to the dcache at the address being DMA'd. As we do not
// have dcache emulation, invalid data is being DMA'd causing audio glitches. The following code detects this and
// enables the DMA to complete instantly before the invalid data is written. Resident Evil 2 & 3 trigger this.
u64 dma_in_progress = DSP::DMAInProgress();
if (dma_in_progress != 0)
{
u32 start_addr = (dma_in_progress >> 32) & Memory::RAM_MASK;
u32 end_addr = (dma_in_progress & Memory::RAM_MASK) & 0xffffffff;
u32 invalidated_addr = (address & Memory::RAM_MASK) & ~0x1f;
if (invalidated_addr >= start_addr && invalidated_addr <= end_addr)
{
DSP::EnableInstantDMA();
}
}
}
void Interpreter::dcbst(UGeckoInstruction _inst)
{
// Cache line flush. Since we don't emulate the data cache, we don't need to do anything.
// Invalidate the jit block cache on dcbst in case new code has been loaded via the data cache
u32 address = Helper_Get_EA_X(_inst);
JitInterface::InvalidateICache(address & ~0x1f, 32);
u32 address = Helper_Get_EA_X(_inst);
JitInterface::InvalidateICache(address & ~0x1f, 32);
}
void Interpreter::dcbt(UGeckoInstruction _inst)

View File

@ -79,10 +79,10 @@ static GekkoOPTemplate primarytable[] =
{54, Interpreter::stfd, {"stfd", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{55, Interpreter::stfdu, {"stfdu", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{56, Interpreter::psq_l, {"psq_l", OPTYPE_PS, FL_OUT_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{57, Interpreter::psq_lu, {"psq_lu", OPTYPE_PS, FL_OUT_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{60, Interpreter::psq_st, {"psq_st", OPTYPE_PS, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{61, Interpreter::psq_stu, {"psq_stu", OPTYPE_PS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{56, Interpreter::psq_l, {"psq_l", OPTYPE_LOADPS, FL_OUT_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{57, Interpreter::psq_lu, {"psq_lu", OPTYPE_LOADPS, FL_OUT_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{60, Interpreter::psq_st, {"psq_st", OPTYPE_STOREPS, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
{61, Interpreter::psq_stu, {"psq_stu", OPTYPE_STOREPS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
//missing: 0, 5, 6, 9, 22, 30, 62, 58
{0, Interpreter::unknown_instruction, {"unknown_instruction", OPTYPE_UNKNOWN, 0, 0, 0, 0, 0}},

View File

@ -697,7 +697,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
}
// Add an external exception check if the instruction writes to the FIFO.
if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end())
if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end() ||
jit->js.dspARAMAddresses.find(ops[i].address) != jit->js.dspARAMAddresses.end())
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT));
FixupBranch clearInt = J_CC(CC_NZ);
@ -707,7 +708,14 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
SetJumpTarget(extException);
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = J_CC(CC_Z, true);
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end())
{
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
}
else
{
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_DSP));
}
FixupBranch noCPInt = J_CC(CC_Z, true);
gpr.Flush(FLUSH_MAINTAIN_STATE);

View File

@ -97,6 +97,7 @@ protected:
JitBlock *curBlock;
std::unordered_set<u32> fifoWriteAddresses;
std::unordered_set<u32> dspARAMAddresses;
};
PPCAnalyst::CodeBlock code_block;

View File

@ -216,7 +216,7 @@ namespace JitInterface
jit->GetBlockCache()->InvalidateICache(address, size);
}
u32 Read_Opcode_JIT(u32 _Address)
u32 ReadOpcodeJIT(u32 _Address)
{
if (bMMU && !bFakeVMEM && (_Address & Memory::ADDR_MASK_MEM1))
{
@ -237,6 +237,37 @@ namespace JitInterface
return inst;
}
void CompileExceptionCheck(int type)
{
if (!jit)
return;
std::unordered_set<u32> *exception_addresses;
switch (type)
{
case EXCEPTIONS_FIFO_WRITE:
{
exception_addresses = &jit->js.fifoWriteAddresses;
break;
}
default:
ERROR_LOG(POWERPC, "Unknown exception check type");
}
if (PC != 0 && (exception_addresses->find(PC)) == (exception_addresses->end()))
{
int optype = GetOpInfo(Memory::ReadUnchecked_U32(PC))->type;
if (optype == OPTYPE_STORE || optype == OPTYPE_STOREFP || (optype == OPTYPE_STOREPS))
{
exception_addresses->insert(PC);
// Invalidate the JIT block so that it gets recompiled with the external exception check included.
jit->GetBlockCache()->InvalidateICache(PC, 4);
}
}
}
void Shutdown()
{
if (jit)

View File

@ -11,6 +11,11 @@
namespace JitInterface
{
enum
{
EXCEPTIONS_FIFO_WRITE
};
void DoState(PointerWrap &p);
CPUCoreBase *InitJitCore(int core);
@ -24,7 +29,7 @@ namespace JitInterface
bool HandleFault(uintptr_t access_address, SContext* ctx);
// used by JIT to read instructions
u32 Read_Opcode_JIT(const u32 _Address);
u32 ReadOpcodeJIT(const u32 _Address);
// Clearing CodeCache
void ClearCache();
@ -33,6 +38,8 @@ namespace JitInterface
void InvalidateICache(u32 address, u32 size);
void CompileExceptionCheck(int type);
void Shutdown();
}
extern bool bMMU;

View File

@ -654,7 +654,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
for (u32 i = 0; i < blockSize; ++i)
{
UGeckoInstruction inst = JitInterface::Read_Opcode_JIT(address);
UGeckoInstruction inst = JitInterface::ReadOpcodeJIT(address);
if (inst.hex != 0)
{

View File

@ -70,6 +70,9 @@ enum
OPTYPE_STOREFP ,
OPTYPE_DOUBLEFP,
OPTYPE_SINGLEFP,
OPTYPE_LOADPS ,
OPTYPE_STOREPS ,
OPTYPE_FPU ,
OPTYPE_PS ,
OPTYPE_DCACHE ,
OPTYPE_ICACHE ,

View File

@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 34;
static const u32 STATE_VERSION = 35;
enum
{