JIT: use far code emitter in more places

This commit is contained in:
Fiora 2014-08-31 20:08:51 -07:00
parent c5381bae66
commit 54e26f64c6
5 changed files with 53 additions and 24 deletions

View File

@ -167,11 +167,13 @@ void Jit64::Init()
trampolines.Init();
AllocCodeSpace(CODE_SIZE);
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
blocks.Init();
asm_routines.Init();
// important: do this *after* generating the global asm routines, because we can't use farcode in them.
// it'll crash because the farcode functions get cleared on JIT clears.
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;

View File

@ -279,18 +279,20 @@ void Jit64::dcbz(UGeckoInstruction inst)
ADD(32, R(RSCRATCH), gpr.R(a));
AND(32, R(RSCRATCH), Imm32(~31));
TEST(32, R(RSCRATCH), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z, true);
FixupBranch slow = J_CC(CC_NZ, true);
// Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but
// supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure.
SwitchToFarCode();
SetJumpTarget(slow);
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
FixupBranch exit = J(true);
FixupBranch exit = J();
SetJumpTarget(fast);
SwitchToNearCode();
PXOR(XMM0, R(XMM0));
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);

View File

@ -271,11 +271,11 @@ void JitIL::Init()
trampolines.Init();
AllocCodeSpace(CODE_SIZE);
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
blocks.Init();
asm_routines.Init();
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;

View File

@ -351,7 +351,14 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
}
TEST(32, addr_loc, Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z, true);
FixupBranch slow, exit;
slow = J_CC(CC_NZ, farcode.Enabled());
UnsafeLoadToReg(reg_value, addr_loc, accessSize, 0, signExtend);
if (farcode.Enabled())
SwitchToFarCode();
else
exit = J(true);
SetJumpTarget(slow);
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
@ -385,10 +392,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
}
MEMCHECK_END
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeLoadToReg(reg_value, addr_loc, accessSize, 0, signExtend);
if (farcode.Enabled())
{
exit = J(true);
SwitchToNearCode();
}
SetJumpTarget(exit);
}
}
@ -470,12 +478,21 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
}
#endif
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
FixupBranch slow, exit;
TEST(32, R(reg_addr), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z, true);
slow = J_CC(CC_NZ, farcode.Enabled());
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
if (farcode.Enabled())
SwitchToFarCode();
else
exit = J(true);
SetJumpTarget(slow);
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
switch (accessSize)
{
@ -493,9 +510,11 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
if (farcode.Enabled())
{
exit = J(true);
SwitchToNearCode();
}
SetJumpTarget(exit);
}
@ -659,15 +678,17 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
// to save an instruction, since diverting a few more floats to the slow path can't hurt much.
SUB(8, R(RSCRATCH), Imm8(0x6D));
CMP(8, R(RSCRATCH), Imm8(0x3));
FixupBranch x87Conversion = J_CC(CC_BE);
FixupBranch x87Conversion = J_CC(CC_BE, true);
CVTSD2SS(dst, R(src));
FixupBranch continue1 = J();
SwitchToFarCode();
SetJumpTarget(x87Conversion);
MOVSD(M(&temp64), src);
FLD(64, M(&temp64));
FSTP(32, M(&temp32));
MOVSS(dst, M(&temp32));
FixupBranch continue1 = J(true);
SwitchToNearCode();
SetJumpTarget(continue1);
// We'd normally need to MOVDDUP here to put the single in the top half of the output register too, but
@ -696,16 +717,17 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
// through the slow path (0x00800000), but the performance effects of that should be negligible.
SUB(32, R(gprsrc), Imm8(1));
TEST(32, R(gprsrc), Imm32(0x7f800000));
FixupBranch x87Conversion = J_CC(CC_Z);
FixupBranch x87Conversion = J_CC(CC_Z, true);
CVTSS2SD(dst, R(dst));
FixupBranch continue1 = J();
SwitchToFarCode();
SetJumpTarget(x87Conversion);
MOVSS(M(&temp32), dst);
FLD(32, M(&temp32));
FSTP(64, M(&temp64));
MOVSD(dst, M(&temp64));
FixupBranch continue1 = J(true);
SwitchToNearCode();
SetJumpTarget(continue1);
MOVDDUP(dst, R(dst));

View File

@ -36,9 +36,12 @@ namespace MMIO { class Mapping; }
// exception branches.
class FarCodeCache : public Gen::X64CodeBlock
{
private:
bool m_enabled = false;
public:
void Init(int size) { AllocCodeSpace(size); }
void Shutdown() { FreeCodeSpace(); }
bool Enabled() { return m_enabled; }
void Init(int size) { AllocCodeSpace(size); m_enabled = true; }
void Shutdown() { FreeCodeSpace(); m_enabled = false; }
};
// Like XCodeBlock but has some utilities for memory access.