Merge pull request #2663 from degasus/dcbx

Jit64: dcbf + dcbi
This commit is contained in:
Markus Wick 2015-08-25 12:16:56 +02:00
commit 24cb650078
7 changed files with 101 additions and 19 deletions

View File

@ -145,12 +145,12 @@ struct ARAMInfo
// STATE_TO_SAVE
static ARAMInfo g_ARAM;
static UDSPControl g_dspState;
static AudioDMA g_audioDMA;
static ARAM_DMA g_arDMA;
static u32 last_mmaddr;
static u32 last_aram_dma_count;
static bool instant_dma;
UDSPControl g_dspState;
union ARAM_Info
{
@ -216,6 +216,22 @@ void EnableInstantDMA()
instant_dma = true;
}
void FlushInstantDMA(u32 address)
{
u64 dma_in_progress = DSP::DMAInProgress();
if (dma_in_progress != 0)
{
u32 start_addr = (dma_in_progress >> 32) & Memory::RAM_MASK;
u32 end_addr = (dma_in_progress & Memory::RAM_MASK) & 0xffffffff;
u32 invalidated_addr = (address & Memory::RAM_MASK) & ~0x1f;
if (invalidated_addr >= start_addr && invalidated_addr <= end_addr)
{
DSP::EnableInstantDMA();
}
}
}
DSPEmulator *GetDSPEmulator()
{
return dsp_emulator;

View File

@ -56,6 +56,8 @@ union UDSPControl
UDSPControl(u16 _Hex = 0) : Hex(_Hex) {}
};
extern UDSPControl g_dspState;
void Init(bool hle);
void Shutdown();
@ -78,5 +80,6 @@ void UpdateAudioDMA();
void UpdateDSPSlice(int cycles);
u64 DMAInProgress();
void EnableInstantDMA();
void FlushInstantDMA(u32 address);
}// end of namespace DSP

View File

@ -340,18 +340,7 @@ void Interpreter::dcbi(UGeckoInstruction _inst)
// The following detects a situation where the game is writing to the dcache at the address being DMA'd. As we do not
// have dcache emulation, invalid data is being DMA'd causing audio glitches. The following code detects this and
// enables the DMA to complete instantly before the invalid data is written. Resident Evil 2 & 3 trigger this.
u64 dma_in_progress = DSP::DMAInProgress();
if (dma_in_progress != 0)
{
u32 start_addr = (dma_in_progress >> 32) & Memory::RAM_MASK;
u32 end_addr = (dma_in_progress & Memory::RAM_MASK) & 0xffffffff;
u32 invalidated_addr = (address & Memory::RAM_MASK) & ~0x1f;
if (invalidated_addr >= start_addr && invalidated_addr <= end_addr)
{
DSP::EnableInstantDMA();
}
}
DSP::FlushInstantDMA(address);
}
void Interpreter::dcbst(UGeckoInstruction _inst)

View File

@ -255,4 +255,6 @@ public:
void lmw(UGeckoInstruction inst);
void stmw(UGeckoInstruction inst);
void dcbx(UGeckoInstruction inst);
};

View File

@ -214,11 +214,11 @@ static GekkoOPTemplate table31[] =
{824, &Jit64::srawix}, // srawix
{24, &Jit64::slwx}, // slwx
{54, &Jit64::FallBackToInterpreter}, // dcbst
{86, &Jit64::FallBackToInterpreter}, // dcbf
{246, &Jit64::dcbt }, // dcbtst
{278, &Jit64::dcbt }, // dcbt
{470, &Jit64::FallBackToInterpreter}, // dcbi
{54, &Jit64::dcbx}, // dcbst
{86, &Jit64::dcbx}, // dcbf
{246, &Jit64::dcbt}, // dcbtst
{278, &Jit64::dcbt}, // dcbt
{470, &Jit64::dcbx}, // dcbi
{758, &Jit64::DoNothing}, // dcba
{1014, &Jit64::dcbz}, // dcbz

View File

@ -7,6 +7,8 @@
#include "Common/CommonTypes.h"
#include "Core/HW/DSP.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
@ -290,6 +292,70 @@ void Jit64::lXXx(UGeckoInstruction inst)
gpr.UnlockAllX();
}
void Jit64::dcbx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
X64Reg addr = RSCRATCH;
X64Reg value = RSCRATCH2;
X64Reg tmp = gpr.GetFreeXReg();
gpr.FlushLockX(tmp);
if (inst.RA && gpr.R(inst.RA).IsSimpleReg() && gpr.R(inst.RB).IsSimpleReg())
{
LEA(32, addr, MRegSum(gpr.RX(inst.RA), gpr.RX(inst.RB)));
}
else
{
MOV(32, R(addr), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(addr), gpr.R(inst.RA));
}
// Check whether a JIT cache line needs to be invalidated.
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
SHR(32, R(value), Imm8(3 + 5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
MOV(64, R(tmp), ImmPtr(jit->GetBlockCache()->GetBlockBitSet()));
MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0));
SHR(32, R(addr), Imm8(5));
BT(32, R(value), R(addr));
FixupBranch c = J_CC(CC_C, true);
SwitchToFarCode();
SetJumpTarget(c);
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
MOV(32, R(ABI_PARAM1), R(addr));
SHL(32, R(ABI_PARAM1), Imm8(5));
MOV(32, R(ABI_PARAM2), Imm32(32));
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
ABI_CallFunction((void*)JitInterface::InvalidateICache);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
c = J(true);
SwitchToNearCode();
SetJumpTarget(c);
// dcbi
if (inst.SUBOP10 == 470)
{
// Flush DSP DMA if DMAState bit is set
TEST(16, M(&DSP::g_dspState), Imm16(1 << 9));
c = J_CC(CC_NZ, true);
SwitchToFarCode();
SetJumpTarget(c);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
SHL(32, R(addr), Imm8(5));
ABI_CallFunctionR((void*)DSP::FlushInstantDMA, addr);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
c = J(true);
SwitchToNearCode();
SetJumpTarget(c);
}
gpr.UnlockAllX();
}
void Jit64::dcbt(UGeckoInstruction inst)
{
INSTRUCTION_START

View File

@ -60,14 +60,15 @@ typedef void (*CompiledCode)();
// implementation of std::bitset is slow.
class ValidBlockBitSet final
{
public:
enum
{
VALID_BLOCK_MASK_SIZE = 0x20000000 / 32,
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
};
// Directly accessed by Jit64.
std::unique_ptr<u32[]> m_valid_block;
public:
ValidBlockBitSet()
{
m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]);
@ -157,6 +158,11 @@ public:
// DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateICache(u32 address, const u32 length, bool forced);
u32* GetBlockBitSet() const
{
return valid_block.m_valid_block.get();
}
};
// x86 BlockCache