Merge pull request #2962 from Sonicadvance1/aarch64_integer_gatherpipe

[AArch64] Implement integer gatherpipe writes.
This commit is contained in:
Ryan Houdek 2015-09-07 06:20:01 -05:00
commit a9a339a00c
5 changed files with 161 additions and 12 deletions

View File

@ -7,6 +7,7 @@
#include "Common/PerformanceCounter.h" #include "Common/PerformanceCounter.h"
#include "Core/PatchEngine.h" #include "Core/PatchEngine.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/Profiler.h" #include "Core/PowerPC/Profiler.h"
#include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/Jit.h"
@ -137,6 +138,17 @@ void JitArm64::Break(UGeckoInstruction inst)
exit(0); exit(0);
} }
void JitArm64::Cleanup()
{
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{
gpr.Lock(W0);
MOVI2R(X0, (u64)&GPFifo::FastCheckGatherPipe);
BLR(X0);
gpr.Unlock(W0);
}
}
void JitArm64::DoDownCount() void JitArm64::DoDownCount()
{ {
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
@ -160,6 +172,7 @@ void JitArm64::DoDownCount()
// Exits // Exits
void JitArm64::WriteExit(u32 destination) void JitArm64::WriteExit(u32 destination)
{ {
Cleanup();
DoDownCount(); DoDownCount();
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
@ -188,6 +201,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest)
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc));
gpr.Unlock(dest); gpr.Unlock(dest);
Cleanup();
DoDownCount(); DoDownCount();
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
@ -204,6 +218,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest)
void JitArm64::WriteExceptionExit() void JitArm64::WriteExceptionExit()
{ {
Cleanup();
DoDownCount(); DoDownCount();
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
@ -224,10 +239,31 @@ void JitArm64::WriteExceptionExit()
gpr.Unlock(WA); gpr.Unlock(WA);
} }
void JitArm64::WriteExternalExceptionExit(ARM64Reg dest)
{
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc));
gpr.Unlock(dest);
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExternalExceptions);
BLR(EncodeRegTo64(dest));
LDR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc));
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc));
MOVI2R(EncodeRegTo64(dest), (u64)asm_routines.dispatcher);
BR(EncodeRegTo64(dest));
}
void JitArm64::WriteExitDestInR(ARM64Reg Reg) void JitArm64::WriteExitDestInR(ARM64Reg Reg)
{ {
STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc)); STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc));
gpr.Unlock(Reg); gpr.Unlock(Reg);
Cleanup();
DoDownCount(); DoDownCount();
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
@ -450,6 +486,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
js.isLastInstruction = true; js.isLastInstruction = true;
} }
// Gather pipe writes using a non-immediate address are discovered by profiling.
bool gatherPipeIntCheck = jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end();
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{ {
js.fifoBytesThisBlock -= 32; js.fifoBytesThisBlock -= 32;
@ -458,11 +497,65 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
regs_in_use[W30] = 0; regs_in_use[W30] = 0;
FixupBranch Exception = B();
SwitchToFarCode();
const u8* done_here = GetCodePtr();
FixupBranch exit = B();
SetJumpTarget(Exception);
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);
MOVI2R(X30, (u64)&GPFifo::FastCheckGatherPipe); MOVI2R(X30, (u64)&GPFifo::FastCheckGatherPipe);
BLR(X30); BLR(X30);
ABI_PopRegisters(regs_in_use); ABI_PopRegisters(regs_in_use);
// Inline exception check
LDR(INDEX_UNSIGNED, W30, X29, PPCSTATE_OFF(Exceptions));
TBZ(W30, 3, done_here); // EXCEPTION_EXTERNAL_INT
LDR(INDEX_UNSIGNED, W30, X29, PPCSTATE_OFF(msr));
TBZ(W30, 11, done_here);
MOVI2R(X30, (u64)&ProcessorInterface::m_InterruptCause);
LDR(INDEX_UNSIGNED, W30, X30, 0);
TST(W30, 23, 2);
B(CC_EQ, done_here);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
MOVI2R(W30, ops[i].address);
WriteExternalExceptionExit(W30);
SwitchToNearCode();
SetJumpTarget(exit);
gpr.Unlock(W30); gpr.Unlock(W30);
// So we don't check exceptions twice
gatherPipeIntCheck = false;
}
// Gather pipe writes can generate an exception; add an exception check.
// TODO: This doesn't really match hardware; the CP interrupt is
// asynchronous.
if (jo.optimizeGatherPipe && gatherPipeIntCheck)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
FixupBranch NoExtException = TBZ(WA, 3); // EXCEPTION_EXTERNAL_INT
FixupBranch Exception = B();
SwitchToFarCode();
const u8* done_here = GetCodePtr();
FixupBranch exit = B();
SetJumpTarget(Exception);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr));
TBZ(WA, 11, done_here);
MOVI2R(XA, (u64)&ProcessorInterface::m_InterruptCause);
LDR(INDEX_UNSIGNED, WA, XA, 0);
TST(WA, 23, 2);
B(CC_EQ, done_here);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
MOVI2R(WA, ops[i].address);
WriteExternalExceptionExit(WA);
SwitchToNearCode();
SetJumpTarget(NoExtException);
SetJumpTarget(exit);
} }
if (!ops[i].skip) if (!ops[i].skip)

View File

@ -273,6 +273,7 @@ private:
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b);
void DoDownCount(); void DoDownCount();
void Cleanup();
// Profiling // Profiling
void BeginTimeProfile(JitBlock* b); void BeginTimeProfile(JitBlock* b);
@ -282,6 +283,7 @@ private:
void WriteExit(u32 destination); void WriteExit(u32 destination);
void WriteExceptionExit(Arm64Gen::ARM64Reg dest); void WriteExceptionExit(Arm64Gen::ARM64Reg dest);
void WriteExceptionExit(); void WriteExceptionExit();
void WriteExternalExceptionExit(ARM64Reg dest);
void WriteExitDestInR(Arm64Gen::ARM64Reg dest); void WriteExitDestInR(Arm64Gen::ARM64Reg dest);
FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);

View File

@ -278,18 +278,18 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
{ {
if (!(access_address >= (uintptr_t)Memory::physical_base && access_address < (uintptr_t)Memory::physical_base + 0x100010000) && if (!IsInSpace((u8*)ctx->CTX_PC))
!(access_address >= (uintptr_t)Memory::logical_base && access_address < (uintptr_t)Memory::logical_base + 0x100010000))
{ {
ERROR_LOG(DYNA_REC, "Exception handler - access below memory space. PC: 0x%016llx 0x%016lx < 0x%016lx", ctx->CTX_PC, access_address, (uintptr_t)Memory::physical_base); ERROR_LOG(DYNA_REC, "Backpatch location not within codespace 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC));
DoBacktrace(access_address, ctx); DoBacktrace(access_address, ctx);
return false; return false;
} }
if (!IsInSpace((u8*)ctx->CTX_PC)) if (!(access_address >= (uintptr_t)Memory::physical_base && access_address < (uintptr_t)Memory::physical_base + 0x100010000) &&
!(access_address >= (uintptr_t)Memory::logical_base && access_address < (uintptr_t)Memory::logical_base + 0x100010000))
{ {
ERROR_LOG(DYNA_REC, "Backpatch location not within codespace 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC)); ERROR_LOG(DYNA_REC, "Exception handler - access below memory space. PC: 0x%016llx 0x%016lx < 0x%016lx", ctx->CTX_PC, access_address, (uintptr_t)Memory::physical_base);
DoBacktrace(access_address, ctx); DoBacktrace(access_address, ctx);
return false; return false;

View File

@ -285,10 +285,64 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
if (is_immediate) if (is_immediate)
mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size); mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size);
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) if (is_immediate && jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))
{
ARM64Reg WA = INVALID_REG;
int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_32)
accessSize = 32;
else if (flags & BackPatchInfo::FLAG_SIZE_16)
accessSize = 16;
else
accessSize = 8;
if (accessSize != 8)
WA = gpr.GetReg();
u64 base_ptr = std::min((u64)&GPFifo::m_gatherPipeCount, (u64)&GPFifo::m_gatherPipe);
u32 count_off = (u64)&GPFifo::m_gatherPipeCount - base_ptr;
u32 pipe_off = (u64)&GPFifo::m_gatherPipe - base_ptr;
MOVI2R(X30, base_ptr);
if (pipe_off)
ADD(X1, X30, pipe_off);
LDR(INDEX_UNSIGNED, W0, X30, count_off);
if (accessSize == 32)
{
REV32(WA, RS);
if (pipe_off)
STR(WA, X1, ArithOption(X0));
else
STR(WA, X30, ArithOption(X0));
}
else if (accessSize == 16)
{
REV16(WA, RS);
if (pipe_off)
STRH(WA, X1, ArithOption(X0));
else
STRH(WA, X30, ArithOption(X0));
}
else
{
if (pipe_off)
STRB(RS, X1, ArithOption(X0));
else
STRB(RS, X30, ArithOption(X0));
}
ADD(W0, W0, accessSize >> 3);
STR(INDEX_UNSIGNED, W0, X30, count_off);
js.fifoBytesThisBlock += accessSize >> 3;
if (accessSize != 8)
gpr.Unlock(WA);
}
else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{ {
MOVI2R(XA, imm_addr); MOVI2R(XA, imm_addr);
EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0));
} }
else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE)) else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE))

View File

@ -422,7 +422,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
} }
ADD(W0, W0, accessSize >> 3); ADD(W0, W0, accessSize >> 3);
STR(INDEX_UNSIGNED, W0, X30, count_off); STR(INDEX_UNSIGNED, W0, X30, count_off);
jit->js.fifoBytesThisBlock += accessSize >> 3; js.fifoBytesThisBlock += accessSize >> 3;
if (update) if (update)
{ {