diff --git a/Source/Core/Core/Core.vcproj b/Source/Core/Core/Core.vcproj index 03334470df..3a7e322e21 100644 --- a/Source/Core/Core/Core.vcproj +++ b/Source/Core/Core/Core.vcproj @@ -896,6 +896,14 @@ RelativePath=".\Src\PowerPC\Jit64\Jit_SystemRegisters.cpp" > + + + + @@ -1162,6 +1170,10 @@ RelativePath=".\Src\PatchEngine.h" > + + diff --git a/Source/Core/Core/Src/HW/CommandProcessor.cpp b/Source/Core/Core/Src/HW/CommandProcessor.cpp index 8c3c6d9a01..bf7b613880 100644 --- a/Source/Core/Core/Src/HW/CommandProcessor.cpp +++ b/Source/Core/Core/Src/HW/CommandProcessor.cpp @@ -329,7 +329,7 @@ void Write32(const u32 _Data, const u32 _Address) void GatherPipeBursted() { - // we arn't linked, so we don't care about gather pipe data + // if we aren't linked, we don't care about gather pipe data if (!fifo.bFF_GPLinkEnable) return; diff --git a/Source/Core/Core/Src/HW/GPFifo.cpp b/Source/Core/Core/Src/HW/GPFifo.cpp index c279500f09..19ddb43484 100644 --- a/Source/Core/Core/Src/HW/GPFifo.cpp +++ b/Source/Core/Core/Src/HW/GPFifo.cpp @@ -73,7 +73,7 @@ void CheckGatherPipe() // increase the CPUWritePointer CPeripheralInterface::Fifo_CPUWritePointer += GATHER_PIPE_SIZE; if (CPeripheralInterface::Fifo_CPUWritePointer > CPeripheralInterface::Fifo_CPUEnd) - _assert_msg_(DYNA_REC, 0, "ARGH"); + _assert_msg_(DYNA_REC, 0, "Fifo_CPUWritePointer out of bounds"); if (CPeripheralInterface::Fifo_CPUWritePointer >= CPeripheralInterface::Fifo_CPUEnd) CPeripheralInterface::Fifo_CPUWritePointer = CPeripheralInterface::Fifo_CPUBase; @@ -94,7 +94,7 @@ void Write16(const u16 _iValue, const u32 _iAddress) { // LOG(GPFIFO, "GPFIFO #%x: 0x%04x",CPeripheralInterface::Fifo_CPUWritePointer+m_gatherPipeCount, _iValue); *(u16*)(&m_gatherPipe[m_gatherPipeCount]) = Common::swap16(_iValue); - m_gatherPipeCount+=2; + m_gatherPipeCount += 2; CheckGatherPipe(); } @@ -105,7 +105,7 @@ void Write32(const u32 _iValue, const u32 _iAddress) // LOG(GPFIFO, "GPFIFO #%x: 0x%08x / %f",CPeripheralInterface::Fifo_CPUWritePointer+m_gatherPipeCount, _iValue, floatvalue); #endif *(u32*)(&m_gatherPipe[m_gatherPipeCount]) = Common::swap32(_iValue); - m_gatherPipeCount+=4; + m_gatherPipeCount += 4; CheckGatherPipe(); } diff --git a/Source/Core/Core/Src/HW/Memmap.cpp b/Source/Core/Core/Src/HW/Memmap.cpp index 87eae61765..c7aae3a78d 100644 --- a/Source/Core/Core/Src/HW/Memmap.cpp +++ b/Source/Core/Core/Src/HW/Memmap.cpp @@ -878,7 +878,7 @@ u8 *GetPointer(const u32 _Address) } -bool IsRAMAddress(const u32 addr) +bool IsRAMAddress(const u32 addr, bool allow_locked_cache) { switch ((addr >> 24) & 0xFC) { case 0x00: @@ -896,7 +896,7 @@ bool IsRAMAddress(const u32 addr) else return false; case 0xE0: - if (addr - 0xE0000000 < L1_CACHE_SIZE) + if (allow_locked_cache && addr - 0xE0000000 < L1_CACHE_SIZE) return true; else return false; diff --git a/Source/Core/Core/Src/HW/Memmap.h b/Source/Core/Core/Src/HW/Memmap.h index 62c1b5b6a2..5fcbb8dfe0 100644 --- a/Source/Core/Core/Src/HW/Memmap.h +++ b/Source/Core/Core/Src/HW/Memmap.h @@ -77,7 +77,7 @@ namespace Memory void InitHWMemFuncsWii(); u32 Read_Instruction(const u32 _Address); - bool IsRAMAddress(const u32 addr); + bool IsRAMAddress(const u32 addr, bool allow_locked_cache = false); writeFn32 GetHWWriteFun32(const u32 _Address); inline u8* GetCachePtr() {return m_pL1Cache;} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index a59bf1f13e..84d7f6a20b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -20,12 +20,14 @@ #include "Common.h" #include "x64Emitter.h" #include "ABI.h" +#include "Thunk.h" #include "../../HLE/HLE.h" #include "../../CoreTiming.h" #include "../PowerPC.h" #include "../PPCTables.h" #include "../PPCAnalyst.h" #include "../../HW/Memmap.h" +#include "../../HW/GPFifo.h" #include "Jit.h" #include "JitAsm.h" #include "JitCache.h" @@ -294,8 +296,15 @@ namespace Jit64 been_here[PC] = 1; } + void Cleanup() + { + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) + CALL((void *)&GPFifo::CheckGatherPipe); + } + void WriteExit(u32 destination, int exit_num) { + Cleanup(); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); //If nobody has taken care of this yet (this can be removed when all branches are done) @@ -321,6 +330,7 @@ namespace Jit64 void WriteExitDestInEAX(int exit_num) { MOV(32, M(&PC), R(EAX)); + Cleanup(); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); JMP(Asm::dispatcher, true); } @@ -328,12 +338,14 @@ namespace Jit64 void WriteRfiExitDestInEAX() { MOV(32, M(&PC), R(EAX)); + Cleanup(); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); JMP(Asm::testExceptions, true); } void WriteExceptionExit(u32 exception) { + Cleanup(); OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(exception)); MOV(32, M(&PC), Imm32(js.compilerPC + 4)); JMP(Asm::testExceptions, true); @@ -396,6 +408,11 @@ namespace Jit64 // Default(ops[i].inst); gpr.SanityCheck(); fpr.SanityCheck(); + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + { + js.fifoBytesThisBlock -= 32; + CALL(ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); + } } js.compilerPC += 4; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index b1a9c38d16..d83a15861d 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -69,6 +69,7 @@ namespace Jit64 bool enableBlocklink; bool fpAccurateFlags; bool enableFastMem; + bool optimizeGatherPipe; }; extern JitState js; @@ -84,11 +85,6 @@ namespace Jit64 void HLEFunction(UGeckoInstruction _inst); - void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); - void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0); - void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); - void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset); - void addx(UGeckoInstruction inst); void orx(UGeckoInstruction inst); void andx(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index bc180a6512..3d6592e330 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -29,6 +29,7 @@ #include "JitCache.h" #include "../../HW/CPUCompare.h" +#include "../../HW/GPFifo.h" #include "../../Core.h" using namespace Gen; @@ -36,6 +37,7 @@ int blocksExecuted; namespace Jit64 { + namespace Asm { const u8 *enterCode; @@ -47,6 +49,11 @@ const u8 *dispatcherNoCheck; const u8 *dispatcherPcInEAX; const u8 *computeRc; +const u8 *fifoDirectWrite8; +const u8 *fifoDirectWrite16; +const u8 *fifoDirectWrite32; +const u8 *fifoDirectWriteFloat; + static bool blockMode = false; //doesn't work as true! bool compareEnabled = false; @@ -73,6 +80,8 @@ static bool enableStatistics = false; // At this offset - 4, there is an int specifying the block number. +void GenerateCommon(); + #ifdef _M_IX86 void Generate() { @@ -167,36 +176,7 @@ void Generate() POP(EBP); RET(); - computeRc = AlignCode16(); - AND(32, M(&CR), Imm32(0x0FFFFFFF)); - CMP(32, R(EAX), Imm8(0)); - FixupBranch pLesser = J_CC(CC_L); - FixupBranch pGreater = J_CC(CC_G); - - OR(32, M(&CR), Imm32(0x20000000)); // _x86Reg == 0 - RET(); - - SetJumpTarget(pGreater); - OR(32, M(&CR), Imm32(0x40000000)); // _x86Reg > 0 - RET(); - - SetJumpTarget(pLesser); - OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0 - RET(); - - // Fast write routines - special case the most common hardware write - // TODO: use this. - // Even in x86, the param values will be in the right registers. - /* - const u8 *fastMemWrite8 = AlignCode16(); - CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); - FixupBranch skip_fast_write = J_CC(CC_NE, false); - MOV(32, EAX, M(&m_gatherPipeCount)); - MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1); - ADD(32, 1, M(&m_gatherPipeCount)); - RET(); - SetJumpTarget(skip_fast_write); - CALL((void *)&Memory::Write_U8);*/ + GenerateCommon(); } #elif defined(_M_X64) @@ -271,7 +251,7 @@ void Generate() CALL((void *)&CoreTiming::Advance); testExceptions = GetCodePtr(); - TEST(32,M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF)); + TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF)); FixupBranch skipExceptions = J_CC(CC_Z); MOV(32, R(EAX), M(&PC)); MOV(32, M(&NPC), R(EAX)); @@ -287,12 +267,59 @@ void Generate() ABI_PopAllCalleeSavedRegsAndAdjustStack(); RET(); + GenerateCommon(); +} +#endif + +void GenFifoWrite(int size) +{ + // Assume value in ABI_PARAM1 + PUSH(ESI); + if (size != 32) + PUSH(EDX); + BSWAP(size, ABI_PARAM1); + MOV(32, R(EAX), Imm32((u32)GPFifo::m_gatherPipe)); + MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); + if (size != 32) { + MOV(32, R(EDX), R(ABI_PARAM1)); + MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX)); + } else { + MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1)); + } + ADD(32, R(ESI), Imm8(size >> 3)); + MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); + if (size != 32) + POP(EDX); + POP(ESI); + RET(); +} + +static int temp32; +void GenFifoFloatWrite() +{ + // Assume value in XMM0 + PUSH(ESI); + PUSH(EDX); + MOVSS(M(&temp32), XMM0); + MOV(32, R(EDX), M(&temp32)); + BSWAP(32, EDX); + MOV(32, R(EAX), Imm32((u32)GPFifo::m_gatherPipe)); + MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); + MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX)); + ADD(32, R(ESI), Imm8(4)); + MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); + POP(EDX); + POP(ESI); + RET(); +} + +void GenerateCommon() +{ computeRc = AlignCode16(); AND(32, M(&CR), Imm32(0x0FFFFFFF)); CMP(32, R(EAX), Imm8(0)); FixupBranch pLesser = J_CC(CC_L); FixupBranch pGreater = J_CC(CC_G); - OR(32, M(&CR), Imm32(0x20000000)); // _x86Reg == 0 RET(); SetJumpTarget(pGreater); @@ -302,68 +329,30 @@ void Generate() OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0 RET(); -/* - const u8 *end = GetCodePtr(); - - - u8 *xDis = new u8[65536]; - memset(xDis,0,65536); - - disassembler x64disasm; - - x64disasm.set_syntax_intel(); - u64 disasmPtr = (u64)enterCode; - int size = end-enterCode; - char *sptr = (char*)xDis; - - while ((u8*)disasmPtr < end) - { - disasmPtr += x64disasm.disasm64(disasmPtr, disasmPtr, (u8*)disasmPtr, sptr); - sptr += strlen(sptr); - *sptr++ = 13; - *sptr++ = 10; - } - MessageBox(0,(char*)xDis,"yo",0); - delete [] xDis; */ - + fifoDirectWrite8 = AlignCode4(); + GenFifoWrite(8); + fifoDirectWrite16 = AlignCode4(); + GenFifoWrite(16); + fifoDirectWrite32 = AlignCode4(); + GenFifoWrite(32); + fifoDirectWriteFloat = AlignCode4(); + GenFifoFloatWrite(); + // Fast write routines - special case the most common hardware write + // TODO: use this. + // Even in x86, the param values will be in the right registers. /* - RUNTIME_FUNCTION func; - func.BeginAddress = 0; - func.EndAddress = (u32)(GetCodePtr() - enterCode); - func.UnwindData = 0; - - RtlAddFunctionTable(&func, 1, (ULONGLONG)enterCode);*/ - /* - //we only want to do this once - PUSH(RBX); - PUSH(RSI); - PUSH(RDI); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - //TODO: Also preserve XMM0-3? - SUB(64, R(RSP), Imm8(0x20)); - - MOV(32, R(R15), M(&Memory::base)); - - - - - MOV(32, M(&PowerPC::ppcState.pc), R(R14)); - - //Landing pad for drec space - ADD(64, R(RSP), Imm8(0x20)); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RDI); - POP(RSI); - POP(RBX); - RET();*/ -} -#endif -} + const u8 *fastMemWrite8 = AlignCode16(); + CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); + FixupBranch skip_fast_write = J_CC(CC_NE, false); + MOV(32, EAX, M(&m_gatherPipeCount)); + MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1); + ADD(32, 1, M(&m_gatherPipeCount)); + RET(); + SetJumpTarget(skip_fast_write); + CALL((void *)&Memory::Write_U8);*/ } +} // namespace Asm + +} // namespace Jit64 + diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h index 4a1ea0c2aa..88c3a25053 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h @@ -34,6 +34,11 @@ namespace Jit64 extern const u8 *dispatchPcInEAX; extern const u8 *doTiming; + extern const u8 *fifoDirectWrite8; + extern const u8 *fifoDirectWrite16; + extern const u8 *fifoDirectWrite32; + extern const u8 *fifoDirectWriteFloat; + extern bool compareEnabled; void Generate(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp index eae1d6e346..6cbece1004 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp @@ -85,6 +85,7 @@ namespace Jit64 #endif jo.assumeFPLoadFromMem = true; jo.fpAccurateFlags = true; + jo.optimizeGatherPipe = true; codeCache = (u8*)AllocateExecutableMemory(CODE_SIZE); genFunctions = (u8*)AllocateExecutableMemory(GEN_SIZE); @@ -260,7 +261,7 @@ namespace Jit64 } int GetCodeSize() { - return GetCodePtr() - codeCache; + return (int)(GetCodePtr() - codeCache); } //Block linker @@ -369,6 +370,7 @@ namespace Jit64 void ClearCache() { + Core::DisplayMessage("Cleared code cache.", 3000); // Is destroying the blocks really necessary? for (int i = 0; i < numBlocks; i++) { DestroyBlock(i, false); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 41402f1fa9..b53ca8fd71 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -556,13 +556,23 @@ namespace Jit64 int s = inst.RS; if (gpr.R(a).IsImm() || gpr.R(s).IsImm()) { + if (gpr.R(s).IsImm()) + { + if (gpr.R(s).offset == 0 && !inst.Rc) { + // This is pretty common for some reason + gpr.LoadToX64(a, false); + XOR(32, gpr.R(a), gpr.R(a)); + return; + } + // This might also be worth doing. + } Default(inst); return; } if (a != s) { - gpr.Lock(a,s); + gpr.Lock(a, s); gpr.LoadToX64(a, false); MOV(32, gpr.R(a), gpr.R(s)); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index 7701d16e96..cf82e2f24d 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -35,6 +35,7 @@ #include "JitCache.h" #include "JitAsm.h" #include "JitRegCache.h" +#include "Jit_Util.h" // #define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START @@ -47,77 +48,10 @@ namespace Jit64 { - static u64 GC_ALIGNED16(temp64); - static u32 GC_ALIGNED16(temp32); - - void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) - { -#ifdef _M_IX86 - AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); - MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset)); -#else - MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); -#endif - if (accessSize == 32) - { - BSWAP(32, EAX); - } - else if (accessSize == 16) - { - BSWAP(32, EAX); - SHR(32, R(EAX), Imm8(16)); - } - if (signExtend && accessSize < 32) { - MOVSX(32, accessSize, EAX, R(EAX)); - } + namespace { + u64 GC_ALIGNED16(temp64); + u32 GC_ALIGNED16(temp32); } - - void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend) - { - if (offset) - ADD(32, R(reg), Imm32((u32)offset)); - TEST(32, R(reg), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_NZ); - UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend); - FixupBranch arg2 = J(); - SetJumpTarget(argh); - switch (accessSize) - { - case 32: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U32, 1), reg); break; - case 16: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U16, 1), reg); break; - case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break; - } - SetJumpTarget(arg2); - } - - void UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset) - { - if (accessSize != 32) { - PanicAlert("UnsafeWriteRegToReg can't handle %i byte accesses", accessSize); - } - BSWAP(32, reg_value); -#ifdef _M_IX86 - AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); - MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); -#else - MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); -#endif - } - - // Destroys both arg registers - void SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset) - { - if (offset) - ADD(32, R(reg_addr), Imm32(offset)); - TEST(32, R(reg_addr), Imm32(0x0C000000)); - FixupBranch unsafe_addr = J_CC(CC_NZ); - UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0); - FixupBranch skip_call = J(); - SetJumpTarget(unsafe_addr); - ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); - SetJumpTarget(skip_call); - } - void lbzx(UGeckoInstruction inst) { INSTRUCTION_START; @@ -272,73 +206,58 @@ namespace Jit64 case 38: accessSize = 8; break; //stb default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; } -/* + if (gpr.R(a).IsImm() && !update) { u32 addr = (u32)gpr.R(a).offset; addr += offset; - //YAY! - //Now do something smart - if ((addr & 0xFFFFF000) == 0xCC008000) + if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { - //MessageBox(0,"FIFO",0,0); - //Do a direct I/O write -#ifdef _M_X64 - MOV(32, R(EDX), Imm32((u32)gpr.R(a).offset)); - MOV(32, R(ECX), gpr.R(s)); -#elif _M_IX86 - PUSH(32, Imm32((u32)gpr.R(a).offset)); - PUSH(32, gpr.R(s)); -#endif + gpr.FlushLockX(ABI_PARAM1); + MOV(32, R(ABI_PARAM1), gpr.R(s)); + // INT3(); switch (accessSize) { - case 8: CALL((void *)&GPFifo::FastWrite8); break; - case 16: CALL((void *)&GPFifo::FastWrite16); break; - case 32: CALL((void *)&GPFifo::FastWrite32); break; + // No need to protect these, they don't touch any state + case 8: CALL((void *)Asm::fifoDirectWrite8); break; + case 16: CALL((void *)Asm::fifoDirectWrite16); break; + case 32: CALL((void *)Asm::fifoDirectWrite32); break; } js.fifoBytesThisBlock += accessSize >> 3; - if (js.fifoBytesThisBlock > 32) - { - js.fifoBytesThisBlock -= 32; - CALL((void *)&GPFifo::CheckGatherPipe); - } -#ifdef _M_IX86 - ADD(32, R(ESP), Imm8(8)); -#endif + gpr.UnlockAllX(); return; } - else if ((addr>>24) == 0xCC && accessSize == 32) //Other I/O + else if (Memory::IsRAMAddress(addr) && accessSize == 32) { -#ifdef _M_X64 - MOV(32, R(EDX), Imm32((u32)gpr.R(a).offset)); - MOV(32, R(ECX), gpr.R(s)); -#elif _M_IX86 - PUSH(32, Imm32((u32)gpr.R(a).offset)); - PUSH(32, gpr.R(s)); -#endif - CALL((void *)Memory::GetHWWriteFun32(addr)); -#ifdef _M_IX86 - ADD(32, R(ESP), Imm8(8)); -#endif + MOV(accessSize, R(EAX), gpr.R(s)); + BSWAP(accessSize, EAX); + WriteToConstRamAddress(accessSize, R(EAX), addr); + return; + // PanicAlert("yum yum"); + // This may be quite beneficial. } + // Other IO not worth the trouble. } + + // Optimized stack access? if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack) //Zelda does not like this { - //Stack access - MOV(32, R(ECX), gpr.R(a)); + gpr.FlushLockX(ABI_PARAM1); + MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(EAX), gpr.R(s)); BSWAP(32, EAX); #ifdef _M_X64 - MOV(accessSize, MComplex(RBX, ECX, SCALE_1, (u32)offset), R(EAX)); + MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX)); #elif _M_IX86 AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(accessSize, MDisp(ECX, (u32)Memory::base + (u32)offset), R(EAX)); + MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX)); #endif if (update) ADD(32, gpr.R(a), Imm32(offset)); + gpr.UnlockAllX(); return; } -*/ + //Still here? Do regular path. gpr.Lock(s, a); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); @@ -394,8 +313,8 @@ namespace Jit64 /* /// BUGGY //return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16; - gpr.Flush(FLUSH_ALL); - gpr.LockX(ECX, EDX, ESI); + gpr.FlushLockX(ECX, EDX); + gpr.FlushLockX(ESI); //INT3(); MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16)); if (inst.RA) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index d201c959ba..e5440bf829 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -35,6 +35,7 @@ #include "JitCache.h" #include "JitAsm.h" #include "JitRegCache.h" +#include "Jit_Util.h" // #define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START @@ -55,9 +56,10 @@ const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0}; const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}; -static u64 GC_ALIGNED16(temp64); -static u32 GC_ALIGNED16(temp32); - +namespace { +u64 GC_ALIGNED16(temp64); +u32 GC_ALIGNED16(temp32); +} // TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common, // and pshufb could help a lot. // Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves. @@ -178,29 +180,50 @@ void stfs(UGeckoInstruction inst) int s = inst.RS; int a = inst.RA; s32 offset = (s32)(s16)inst.SIMM_16; - if (a && !update) - { - gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); - gpr.Lock(a); - fpr.Lock(s); - MOV(32, R(ABI_PARAM2), gpr.R(a)); - ADD(32, R(ABI_PARAM2), Imm32(offset)); - if (update && offset) - { - MOV(32, gpr.R(a), R(ABI_PARAM2)); - } - CVTSD2SS(XMM0, fpr.R(s)); - MOVSS(M(&temp32), XMM0); - MOV(32, R(ABI_PARAM1), M(&temp32)); - SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0); - gpr.UnlockAll(); - gpr.UnlockAllX(); - fpr.UnlockAll(); - } - else - { + if (!a || update) { Default(inst); + return; } + + if (gpr.R(a).IsImm()) + { + u32 addr = gpr.R(a).offset + offset; + if (Memory::IsRAMAddress(addr)) + { + if (cpu_info.bSSSE3) { + CVTSD2SS(XMM0, fpr.R(s)); + PSHUFB(XMM0, M((void *)bswapShuffle1x4)); + WriteFloatToConstRamAddress(XMM0, addr); + return; + } + } + else if (addr == 0xCC008000) + { + // Float directly to write gather pipe! Fun! + CVTSD2SS(XMM0, fpr.R(s)); + CALL((void*)Asm::fifoDirectWriteFloat); + // TODO + js.fifoBytesThisBlock += 4; + return; + } + } + + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + gpr.Lock(a); + fpr.Lock(s); + MOV(32, R(ABI_PARAM2), gpr.R(a)); + ADD(32, R(ABI_PARAM2), Imm32(offset)); + if (update && offset) + { + MOV(32, gpr.R(a), R(ABI_PARAM2)); + } + CVTSD2SS(XMM0, fpr.R(s)); + MOVSS(M(&temp32), XMM0); + MOV(32, R(ABI_PARAM1), M(&temp32)); + SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0); + gpr.UnlockAll(); + gpr.UnlockAllX(); + fpr.UnlockAll(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 567cad3f7d..6aca9b5f46 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -108,21 +108,67 @@ void psq_st(UGeckoInstruction inst) Default(inst); return; } + if (!inst.RA) + { + // This really should never happen. Unless we change this to also support stwux + Default(inst); + return; + } const UGQR gqr(rSPR(SPR_GQR0 + inst.I)); const EQuantizeType stType = static_cast(gqr.ST_TYPE); int stScale = gqr.ST_SCALE; bool update = inst.OPCD == 61; - if (!inst.RA || inst.W) - { - // PanicAlert(inst.RA ? "W" : "inst"); - Default(inst); - return; - } int offset = inst.SIMM_12; int a = inst.RA; int s = inst.RS; // Fp numbers + if (inst.W) { + // PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update); + // It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only + // floats so that's what we'll work on. + switch (stType) + { + case QUANTIZE_FLOAT: + { + if (gpr.R(a).IsImm()) + { + PanicAlert("Imm: %08x", gpr.R(a).offset); + } + DISABLE_32BIT; + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + gpr.Lock(a); + fpr.Lock(s); + if (update) + gpr.LoadToX64(a, true, true); + MOV(32, R(ABI_PARAM2), gpr.R(a)); + if (offset) + ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); + TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); + if (update && offset) + MOV(32, gpr.R(a), R(ABI_PARAM2)); + CVTSD2SS(XMM0, fpr.R(s)); + MOVD_xmm(M(&temp64), XMM0); + MOV(32, R(ABI_PARAM1), M(&temp64)); + FixupBranch argh = J_CC(CC_NZ); + BSWAP(32, ABI_PARAM1); + MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); + FixupBranch skip_call = J(); + SetJumpTarget(argh); + CALL(ProtectFunction((void *)&Memory::Write_U32, 2)); + SetJumpTarget(skip_call); + gpr.UnlockAll(); + gpr.UnlockAllX(); + fpr.UnlockAll(); + return; + } + default: + Default(inst); + return; + } + return; + } + if (stType == QUANTIZE_FLOAT) { DISABLE_32BIT; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp index 77e749f9f0..d171079f41 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp @@ -43,9 +43,9 @@ namespace Jit64 { - static const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; - static const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; - static const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0}; + const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; + const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; + const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0}; void ps_sign(UGeckoInstruction inst) { diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Util.cpp new file mode 100644 index 0000000000..21987b6a55 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Util.cpp @@ -0,0 +1,127 @@ +// Copyright (C) 2003-2008 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +#include "Thunk.h" + +#include "../PowerPC.h" +#include "../../Core.h" +#include "../../HW/GPFifo.h" +#include "../../HW/CommandProcessor.h" +#include "../../HW/PixelEngine.h" +#include "../../HW/Memmap.h" +#include "../PPCTables.h" +#include "x64Emitter.h" +#include "ABI.h" + +#include "Jit.h" +#include "JitCache.h" +#include "JitAsm.h" +#include "JitRegCache.h" + + +namespace Jit64 +{ + +void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) +{ +#ifdef _M_IX86 + AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); + MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset)); +#else + MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); +#endif + if (accessSize == 32) + { + BSWAP(32, EAX); + } + else if (accessSize == 16) + { + BSWAP(32, EAX); + SHR(32, R(EAX), Imm8(16)); + } + if (signExtend && accessSize < 32) { + MOVSX(32, accessSize, EAX, R(EAX)); + } +} + +void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend) +{ + if (offset) + ADD(32, R(reg), Imm32((u32)offset)); + TEST(32, R(reg), Imm32(0x0C000000)); + FixupBranch argh = J_CC(CC_NZ); + UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend); + FixupBranch arg2 = J(); + SetJumpTarget(argh); + switch (accessSize) + { + case 32: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U32, 1), reg); break; + case 16: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U16, 1), reg); break; + case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break; + } + SetJumpTarget(arg2); +} + +void UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset) +{ + if (accessSize != 32) { + PanicAlert("UnsafeWriteRegToReg can't handle %i byte accesses", accessSize); + } + BSWAP(32, reg_value); +#ifdef _M_IX86 + AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); + MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); +#else + MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); +#endif +} + +// Destroys both arg registers +void SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset) +{ + if (offset) + ADD(32, R(reg_addr), Imm32(offset)); + TEST(32, R(reg_addr), Imm32(0x0C000000)); + FixupBranch unsafe_addr = J_CC(CC_NZ); + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0); + FixupBranch skip_call = J(); + SetJumpTarget(unsafe_addr); + ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); + SetJumpTarget(skip_call); +} + +void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address) +{ +#ifdef _M_X64 + MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg); +#else + MOV(accessSize, M((void*)(Memory::base + (address & Memory::MEMVIEW32_MASK))), arg); +#endif +} + +void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address) +{ +#ifdef _M_X64 + MOV(32, R(RAX), Imm32(address)); + MOVSS(MComplex(RBX, RAX, 1, 0), xmm_reg); +#else + MOVSS(M((void*)((u32)Memory::base + (address & Memory::MEMVIEW32_MASK))), xmm_reg); +#endif +} + +} // namespace diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Util.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Util.h new file mode 100644 index 0000000000..7a7800c5c1 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Util.h @@ -0,0 +1,33 @@ +// Copyright (C) 2003-2008 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + + +#include "x64Emitter.h" + +namespace Jit64 +{ + +// Memory Load/Store +void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); +void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0); +void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); +void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset); + +void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address); +void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address); + +} // namespace \ No newline at end of file diff --git a/Source/Core/Core/Src/SConscript b/Source/Core/Core/Src/SConscript index 3fa837142f..999d94b294 100644 --- a/Source/Core/Core/Src/SConscript +++ b/Source/Core/Core/Src/SConscript @@ -76,6 +76,7 @@ files = ["Console.cpp", "PowerPC/Jit64/Jit_LoadStore.cpp", "PowerPC/Jit64/Jit_LoadStoreFloating.cpp", "PowerPC/Jit64/Jit_SystemRegisters.cpp", + "PowerPC/Jit64/Jit_Util.cpp", "HLE/HLE.cpp", "HLE/HLE_Misc.cpp", "HLE/HLE_OS.cpp", diff --git a/Source/Core/DebuggerWX/src/CodeWindow.cpp b/Source/Core/DebuggerWX/src/CodeWindow.cpp index 508cc62c2c..76fc8c60b6 100644 --- a/Source/Core/DebuggerWX/src/CodeWindow.cpp +++ b/Source/Core/DebuggerWX/src/CodeWindow.cpp @@ -40,6 +40,8 @@ #include "Debugger/PPCDebugInterface.h" #include "Debugger/Debugger_SymbolMap.h" #include "PowerPC/PPCAnalyst.h" +#include "PowerPC/Jit64/Jit.h" +#include "PowerPC/Jit64/JitCache.h" #include "Core.h" #include "LogManager.h" @@ -235,6 +237,13 @@ void CCodeWindow::CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParam pSymbolsMenu->Append(IDM_SCANFUNCTIONS, _T("&Scan for functions")); pMenuBar->Append(pSymbolsMenu, _T("&Symbols")); } + + { + wxMenu *pJitMenu = new wxMenu; + pJitMenu->Append(IDM_CLEARCODECACHE, _T("&Clear code cache")); + pMenuBar->Append(pJitMenu, _T("&JIT")); + } + SetMenuBar(pMenuBar); } @@ -256,6 +265,16 @@ void CCodeWindow::JumpToAddress(u32 _Address) codeview->Center(_Address); } +void CCodeWindow::OnJitMenu(wxCommandEvent& event) +{ + switch (event.GetId()) + { + case IDM_CLEARCODECACHE: + Jit64::ClearCache(); + break; + } +} + void CCodeWindow::OnSymbolsMenu(wxCommandEvent& event) { if (Core::GetState() == Core::CORE_UNINITIALIZED) diff --git a/Source/Core/DebuggerWX/src/CodeWindow.h b/Source/Core/DebuggerWX/src/CodeWindow.h index c3b7ba12e7..8d8684f736 100644 --- a/Source/Core/DebuggerWX/src/CodeWindow.h +++ b/Source/Core/DebuggerWX/src/CodeWindow.h @@ -81,6 +81,7 @@ class CCodeWindow IDM_SCANFUNCTIONS, IDM_LOADMAPFILE, IDM_SAVEMAPFILE, + IDM_CLEARCODECACHE, }; enum @@ -125,6 +126,7 @@ class CCodeWindow void OnToggleMemoryWindow(wxCommandEvent& event); void OnHostMessage(wxCommandEvent& event); void OnSymbolsMenu(wxCommandEvent& event); + void OnJitMenu(wxCommandEvent& event); void CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParameter); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp b/Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp index 8dc62e8820..1fc678a805 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp @@ -147,8 +147,8 @@ bool OpenGL_Create(SVideoInitialize &_VideoInitialize, int _iwidth, int _iheight nBackbufferHeight = _theight; // change later - s_nTargetWidth = 640<