From fc63c7ecaee9d6136897da45002ad32a573984b5 Mon Sep 17 00:00:00 2001 From: Fiora Date: Mon, 6 Oct 2014 14:00:19 -0700 Subject: [PATCH] JIT: genericize immediate address handling, support in float stores too --- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 2 - .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 131 ++++++------------ .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 62 ++++++--- .../Core/PowerPC/JitCommon/JitAsmCommon.cpp | 39 ++---- .../Core/PowerPC/JitCommon/JitAsmCommon.h | 1 - .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 113 ++++++++++++--- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 7 +- 7 files changed, 194 insertions(+), 161 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 6ff2ca3489..ae1d149e56 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -186,8 +186,6 @@ void Jit64AsmRoutineManager::GenerateCommon() GenFifoWrite(16); fifoDirectWrite32 = AlignCode4(); GenFifoWrite(32); - fifoDirectWriteFloat = AlignCode4(); - GenFifoFloatWrite(); frsqrte = AlignCode4(); GenFrsqrte(); fres = AlignCode4(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 2a0c27b4cd..f0f058401b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -334,98 +334,54 @@ void Jit64::stX(UGeckoInstruction inst) int s = inst.RS; int a = inst.RA; - - bool update = inst.OPCD & 1; - s32 offset = (s32)(s16)inst.SIMM_16; - if (a || !update) + bool update = (inst.OPCD & 1) && offset; + FALLBACK_IF(update); + + if (!a && update) + PanicAlert("Invalid stX"); + + int accessSize; + switch (inst.OPCD & ~1) { - int accessSize; - switch (inst.OPCD & ~1) + case 36: // stw + accessSize = 32; + break; + case 44: // sth + accessSize = 16; + break; + case 38: // stb + accessSize = 8; + break; + default: + _assert_msg_(DYNA_REC, 0, "stX: Invalid access size."); + return; + } + + // If we already know the address of the write + if (!a || gpr.R(a).IsImm()) + { + u32 addr = (a ? (u32)gpr.R(a).offset : 0) + offset; + bool exception = WriteToConstAddress(accessSize, gpr.R(s), addr, CallerSavedRegistersInUse()); + if (update) { - case 36: // stw - accessSize = 32; - break; - case 44: // sth - accessSize = 16; - break; - case 38: // stb - accessSize = 8; - break; - default: - _assert_msg_(DYNA_REC, 0, "stX: Invalid access size."); - return; - } - - if ((a == 0) || gpr.R(a).IsImm()) - { - // If we already know the address through constant folding, we can do some - // fun tricks... - u32 addr = ((a == 0) ? 0 : (u32)gpr.R(a).offset); - addr += offset; - if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) + if (!js.memcheck || !exception) { - // Helps external systems know which instruction triggered the write - MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - - MOV(32, R(RSCRATCH2), gpr.R(s)); - if (update) - gpr.SetImmediate32(a, addr); - - // No need to protect these, they don't touch any state - // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat - switch (accessSize) - { - case 8: - CALL((void *)asm_routines.fifoDirectWrite8); - break; - case 16: - CALL((void *)asm_routines.fifoDirectWrite16); - break; - case 32: - CALL((void *)asm_routines.fifoDirectWrite32); - break; - } - js.fifoBytesThisBlock += accessSize >> 3; - gpr.UnlockAllX(); - return; - } - else if (Memory::IsRAMAddress(addr)) - { - MOV(32, R(RSCRATCH), gpr.R(s)); - WriteToConstRamAddress(accessSize, RSCRATCH, addr, true); - if (update) - gpr.SetImmediate32(a, addr); - return; + gpr.SetImmediate32(a, addr); } else { - // Helps external systems know which instruction triggered the write - MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - - BitSet32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, 0); - switch (accessSize) - { - case 32: - ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); - break; - case 16: - ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); - break; - case 8: - ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); - break; - } - ABI_PopRegistersAndAdjustStack(registersInUse, 0); - if (update) - gpr.SetImmediate32(a, addr); - return; + gpr.KillImmediate(a, true, true); + MEMCHECK_START(false) + ADD(32, gpr.R(a), Imm32((u32)offset)); + MEMCHECK_END } } - + } + else + { gpr.Lock(a, s); - gpr.BindToRegister(a, true, false); + gpr.BindToRegister(a, true, update); if (gpr.R(s).IsImm()) { SafeWriteRegToReg(gpr.R(s), gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); @@ -446,21 +402,14 @@ void Jit64::stX(UGeckoInstruction inst) SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); } - if (update && offset) + if (update) { MEMCHECK_START(false) - gpr.KillImmediate(a, true, true); - ADD(32, gpr.R(a), Imm32((u32)offset)); - MEMCHECK_END } - gpr.UnlockAll(); - } - else - { - PanicAlert("Invalid stX"); } + gpr.UnlockAll(); } void Jit64::stXx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 2b158b5948..a859a53ff9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -101,11 +101,50 @@ void Jit64::stfXXX(UGeckoInstruction inst) int s = inst.RS; int a = inst.RA; int b = inst.RB; + s32 imm = (s16)inst.SIMM_16; + int accessSize = single ? 32 : 64; - FALLBACK_IF((!indexed && !a) || (update && js.memcheck && a == b)); + FALLBACK_IF(update && js.memcheck && a == b); + + if (single) + { + fpr.BindToRegister(s, true, false); + ConvertDoubleToSingle(XMM0, fpr.RX(s)); + MOVD_xmm(R(RSCRATCH), XMM0); + } + else + { + if (fpr.R(s).IsSimpleReg()) + MOVQ_xmm(R(RSCRATCH), fpr.RX(s)); + else + MOV(64, R(RSCRATCH), fpr.R(s)); + } + + if (!indexed && (!a || gpr.R(a).IsImm())) + { + u32 addr = (a ? (u32)gpr.R(a).offset : 0) + imm; + bool exception = WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse()); + + if (update) + { + if (!js.memcheck || !exception) + { + gpr.SetImmediate32(a, addr); + } + else + { + gpr.KillImmediate(a, true, true); + MEMCHECK_START(false) + ADD(32, gpr.R(a), Imm32((u32)imm)); + MEMCHECK_END + } + } + fpr.UnlockAll(); + gpr.UnlockAll(); + return; + } s32 offset = 0; - s32 imm = (s16)inst.SIMM_16; if (indexed) { if (update) @@ -140,21 +179,8 @@ void Jit64::stfXXX(UGeckoInstruction inst) MOV(32, R(RSCRATCH2), gpr.R(a)); } - if (single) - { - fpr.BindToRegister(s, true, false); - ConvertDoubleToSingle(XMM0, fpr.RX(s)); - SafeWriteF32ToReg(XMM0, RSCRATCH2, offset, CallerSavedRegistersInUse()); - fpr.UnlockAll(); - } - else - { - if (fpr.R(s).IsSimpleReg()) - MOVQ_xmm(R(RSCRATCH), fpr.RX(s)); - else - MOV(64, R(RSCRATCH), fpr.R(s)); - SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, offset, CallerSavedRegistersInUse()); - } + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, CallerSavedRegistersInUse()); + if (js.memcheck && update) { // revert the address change if an exception occurred @@ -162,6 +188,8 @@ void Jit64::stfXXX(UGeckoInstruction inst) SUB(32, gpr.R(a), indexed ? gpr.R(b) : Imm32(imm)); MEMCHECK_END } + + fpr.UnlockAll(); gpr.UnlockAll(); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index c47198865e..93115f29eb 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -22,31 +22,13 @@ static int temp32; void CommonAsmRoutines::GenFifoWrite(int size) { - // Assume value in RSCRATCH2 - PUSH(ESI); - MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe)); - MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - - SwapAndStore(size, MComplex(RSCRATCH, ESI, 1, 0), RSCRATCH2); - - ADD(32, R(ESI), Imm8(size >> 3)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - POP(ESI); - RET(); -} - -void CommonAsmRoutines::GenFifoFloatWrite() -{ - // Assume value in XMM0 - PUSH(ESI); - MOVSS(M(&temp32), XMM0); - MOV(32, R(RSCRATCH2), M(&temp32)); - MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe)); - MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(32, MComplex(RSCRATCH, RSI, 1, 0), RSCRATCH2); - ADD(32, R(ESI), Imm8(4)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - POP(ESI); + // Assume value in RSCRATCH + u32 gather_pipe = (u32)(u64)GPFifo::m_gatherPipe; + _assert_msg_(DYNA_REC, gather_pipe <= 0x7FFFFFFF, "Gather pipe not in low 2GB of memory!"); + MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount)); + SwapAndStore(size, MDisp(RSCRATCH2, gather_pipe), RSCRATCH); + ADD(32, R(RSCRATCH2), Imm8(size >> 3)); + MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2)); RET(); } @@ -173,8 +155,8 @@ void CommonAsmRoutines::GenFres() // Safe + Fast Quantizers, originally from JITIL by magumagu -static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15}; +static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 }; static const float GC_ALIGNED16(m_quantizeTableS[]) = { @@ -386,7 +368,8 @@ void CommonAsmRoutines::GenQuantizedSingleStores() // Easy! const u8* storeSingleFloat = AlignCode4(); - SafeWriteF32ToReg(XMM0, RSCRATCH_EXTRA, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOVD_xmm(R(RSCRATCH), XMM0); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); /* if (cpu_info.bSSSE3) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index d13859ff1f..f38df7f266 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -13,7 +13,6 @@ public: const u8 *fifoDirectWrite8; const u8 *fifoDirectWrite16; const u8 *fifoDirectWrite32; - const u8 *fifoDirectWriteFloat; const u8 *enterCode; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 68af8de6f3..6066e334d2 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -422,6 +422,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, } } +static OpArg SwapImmediate(int accessSize, OpArg reg_value) +{ + if (accessSize == 32) + return Imm32(Common::swap32((u32)reg_value.offset)); + else if (accessSize == 16) + return Imm16(Common::swap16((u16)reg_value.offset)); + else + return Imm8((u8)reg_value.offset); +} + u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { u8* result = GetWritableCodePtr(); @@ -429,14 +439,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce if (reg_value.IsImm()) { if (swap) - { - if (accessSize == 32) - reg_value = Imm32(Common::swap32((u32)reg_value.offset)); - else if (accessSize == 16) - reg_value = Imm16(Common::swap16((u16)reg_value.offset)); - else - reg_value = Imm8((u8)reg_value.offset); - } + reg_value = SwapImmediate(accessSize, reg_value); MOV(accessSize, dest, reg_value); } else if (swap) @@ -461,6 +464,68 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce return result; } +void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize) +{ + // No need to protect these, they don't touch any state + // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat + switch (accessSize) + { + case 8: + CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite8); + break; + case 16: + CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite16); + break; + case 32: + CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite32); + break; + } + jit->js.fifoBytesThisBlock += accessSize >> 3; +} + +bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, BitSet32 registersInUse) +{ + // If we already know the address through constant folding, we can do some + // fun tricks... + if ((address & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe && accessSize <= 32) + { + if (!arg.IsSimpleReg() || arg.GetSimpleReg() != RSCRATCH) + MOV(32, R(RSCRATCH), arg); + + UnsafeWriteGatherPipe(accessSize); + return false; + } + else if (Memory::IsRAMAddress(address)) + { + WriteToConstRamAddress(accessSize, arg, address); + return false; + } + else + { + // Helps external systems know which instruction triggered the write + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); + + ABI_PushRegistersAndAdjustStack(registersInUse, 0); + switch (accessSize) + { + case 64: + ABI_CallFunctionAC((void *)&Memory::Write_U64, arg, address); + break; + case 32: + ABI_CallFunctionAC((void *)&Memory::Write_U32, arg, address); + break; + case 16: + ABI_CallFunctionAC((void *)&Memory::Write_U16, arg, address); + break; + case 8: + ABI_CallFunctionAC((void *)&Memory::Write_U8, arg, address); + break; + } + ABI_PopRegistersAndAdjustStack(registersInUse, 0); + return true; + } +} + void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags) { // set the correct immediate format @@ -565,20 +630,30 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces SetJumpTarget(exit); } -// Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here -void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags) +void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap) { - // TODO: PSHUFB might be faster if fastmem supported MOVSS. - MOVD_xmm(R(RSCRATCH), xmm_value); - SafeWriteRegToReg(RSCRATCH, reg_addr, 32, offset, registersInUse, flags); -} + X64Reg reg; + if (arg.IsImm()) + { + arg = SwapImmediate(accessSize, arg); + MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg); + return; + } -void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap) -{ - if (swap) - SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg); + if (!arg.IsSimpleReg() || (!cpu_info.bMOVBE && swap && arg.GetSimpleReg() != RSCRATCH)) + { + MOV(accessSize, R(RSCRATCH), arg); + reg = RSCRATCH; + } else - MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(arg)); + { + reg = arg.GetSimpleReg(); + } + + if (swap) + SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), reg); + else + MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(reg)); } void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 68f3ced898..3487fb374f 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -87,6 +87,7 @@ public: return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap); } u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend); + void UnsafeWriteGatherPipe(int accessSize); // Generate a load/write from the MMIO handler for a given address. Only // call for known addresses in MMIO range (MMIO::IsMMIOAddress). @@ -116,9 +117,9 @@ public: return swap && !cpu_info.bMOVBE && accessSize > 8; } - void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags = 0); - - void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false); + void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true); + // returns true if an exception could have been caused + bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, BitSet32 registersInUse); void JitGetAndClearCAOV(bool oe); void JitSetCA(); void JitSetCAIf(Gen::CCFlags conditionCode);