diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index a8440d920b..d30666121c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -158,7 +158,6 @@ ps_adds1 */ Jit64 jit; -PPCAnalyst::CodeBuffer code_buffer(32000); int CODE_SIZE = 1024*1024*16; @@ -167,6 +166,11 @@ namespace CPUCompare extern u32 m_BlockStart; } + void Jit(u32 em_address) + { + jit.Jit(em_address); + } + void Jit64::Init() { if (Core::g_CoreStartupParameter.bJITUnlimitedCache) @@ -206,12 +210,6 @@ namespace CPUCompare asm_routines.Shutdown(); } - void Jit64::EnterFastRun() - { - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); - //Will return when PowerPC::state changes - } void Jit64::WriteCallInterpreter(UGeckoInstruction inst) { @@ -343,7 +341,25 @@ namespace CPUCompare JMP(asm_routines.testExceptions, true); } - const u8 *Jit64::Jit(u32 em_address) + void Jit64::Run() + { + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); + //Will return when PowerPC::state changes + } + + void Jit64::SingleStep() + { + // NOT USED, NOT TESTED, PROBABLY NOT WORKING YET + + JitBlock temp_block; + PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step! + const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block); + CompiledCode pExecAddr = (CompiledCode)code; + pExecAddr(); + } + + void Jit64::Jit(u32 em_address) { if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) { @@ -354,35 +370,33 @@ namespace CPUCompare } ClearCache(); } - - return blocks.Jit(em_address); + int block_num = blocks.AllocateBlock(em_address); + JitBlock *b = blocks.GetBlock(block_num); + blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b)); } - const u8* Jit64::DoJit(u32 em_address, JitBlock &b) + const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b) { if (em_address == 0) PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); -// if (em_address == 0x800aa278) -// DebugBreak(); - int size; js.isLastInstruction = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; - js.curBlock = &b; + js.curBlock = b; js.blockSetsQuantizers = false; js.block_flags = 0; js.cancel = false; //Analyze the block, collect all instructions it is made of (including inlining, //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, &code_buffer); - PPCAnalyst::CodeOp *ops = code_buffer.codebuffer; + PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer); + PPCAnalyst::CodeOp *ops = code_buffer->codebuffer; const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr - b.checkedEntry = start; - b.runCount = 0; + b->checkedEntry = start; + b->runCount = 0; // Downcount flag check. The last block decremented downcounter, and the flag should still be available. FixupBranch skip = J_CC(CC_NBE); @@ -417,11 +431,11 @@ namespace CPUCompare // Conditionally add profiling code. if (Profiler::g_ProfileBlocks) { - ADD(32, M(&b.runCount), Imm8(1)); + ADD(32, M(&b->runCount), Imm8(1)); #ifdef _WIN32 - b.ticCounter.QuadPart = 0; - b.ticStart.QuadPart = 0; - b.ticStop.QuadPart = 0; + b->ticCounter.QuadPart = 0; + b->ticStart.QuadPart = 0; + b->ticStop.QuadPart = 0; #else //TODO #endif @@ -445,7 +459,8 @@ namespace CPUCompare js.compilerPC = ops[i].address; js.op = &ops[i]; js.instructionNumber = i; - if (i == (int)size - 1) { + if (i == (int)size - 1) + { // WARNING - cmp->branch merging will screw this up. js.isLastInstruction = true; js.next_inst = 0; @@ -458,7 +473,9 @@ namespace CPUCompare PROFILER_ADD_DIFF_LARGE_INTEGER(&b.ticCounter, &b.ticStop, &b.ticStart); PROFILER_VPOP; } - } else { + } + else + { // help peephole optimizations js.next_inst = ops[i + 1].inst; js.next_compilerPC = ops[i + 1].address; @@ -470,6 +487,12 @@ namespace CPUCompare CALL(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); } + // If starting from the breakpointed instruction, we don't break. + if (em_address != ops[i].address && BreakPoints::IsAddressBreakPoint(ops[i].address)) + { + + } + if (!ops[i].skip) PPCTables::CompileInstruction(ops[i].inst); @@ -479,8 +502,8 @@ namespace CPUCompare break; } - b.flags = js.block_flags; - b.codeSize = (u32)(GetCodePtr() - normalEntry); - b.originalSize = size; + b->flags = js.block_flags; + b->codeSize = (u32)(GetCodePtr() - normalEntry); + b->originalSize = size; return normalEntry; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 323c5fdd08..d02b2ccc53 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -114,8 +114,12 @@ private: GPRRegCache gpr; FPURegCache fpr; + // The default code buffer. We keep it around to not have to alloc/dealloc a + // large chunk of memory for each recompiled block. + PPCAnalyst::CodeBuffer code_buffer; + public: - Jit64() {blocks.SetJit(this);} + Jit64() : code_buffer(32000) {} ~Jit64() {} JitState js; @@ -128,8 +132,8 @@ public: // Jit! - const u8 *Jit(u32 em_address); // calls blocks.Jit, which in turn calls DoJit below after setting up a block. - const u8* DoJit(u32 em_address, JitBlock &b); + void Jit(u32 em_address); + const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); JitBlockCache *GetBlockCache() { return &blocks; } @@ -143,7 +147,9 @@ public: // Run! - void EnterFastRun(); + void Run(); + void SingleStep(); + const u8 *BackPatch(u8 *codePtr, int accessType, u32 em_address, CONTEXT *ctx); #define JIT_OPCODE 0 @@ -276,7 +282,7 @@ public: extern Jit64 jit; -const u8 *Jit(u32 em_address); +void Jit(u32 em_address); #endif diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index 71c72f91c3..ad7bb21e0f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -170,9 +170,20 @@ void AsmRoutineManager::Generate() #else //Landing pad for drec space ABI_PopAllCalleeSavedRegsAndAdjustStack(); - RET(); #endif - RET(); + RET(); + + breakpointBailout = GetCodePtr(); +#ifdef _M_IX86 + POP(EDI); + POP(ESI); + POP(EBX); + POP(EBP); +#else + //Landing pad for drec space + ABI_PopAllCalleeSavedRegsAndAdjustStack(); +#endif + RET(); GenerateCommon(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h index 4eac598057..74465a96dc 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h @@ -76,6 +76,8 @@ public: const u8 *fifoDirectWriteFloat; const u8 *fifoDirectWriteXmm64; + const u8 *breakpointBailout; + bool compareEnabled; }; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp index 37e5392b52..3be39967bc 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp @@ -57,6 +57,12 @@ using namespace Gen; #define INVALID_EXIT 0xFFFFFFFF +bool JitBlock::ContainsAddress(u32 em_address) +{ + // WARNING - THIS DOES NOT WORK WITH INLINING ENABLED. + return (em_address >= originalAddress && em_address < originalAddress + originalSize); +} + bool JitBlockCache::IsFull() const { return GetNumBlocks() >= MAX_NUM_BLOCKS - 1; @@ -74,7 +80,7 @@ using namespace Gen; agent = op_open_agent(); #endif blocks = new JitBlock[MAX_NUM_BLOCKS]; - blockCodePointers = new u8*[MAX_NUM_BLOCKS]; + blockCodePointers = new const u8*[MAX_NUM_BLOCKS]; Clear(); } @@ -85,7 +91,7 @@ using namespace Gen; delete [] blockCodePointers; blocks = 0; blockCodePointers = 0; - numBlocks = 0; + num_blocks = 0; #ifdef OPROFILE_REPORT op_close_agent(agent); #endif @@ -97,18 +103,18 @@ using namespace Gen; { Core::DisplayMessage("Cleared code cache.", 3000); // Is destroying the blocks really necessary? - for (int i = 0; i < numBlocks; i++) + for (int i = 0; i < num_blocks; i++) { DestroyBlock(i, false); } links_to.clear(); - numBlocks = 0; + num_blocks = 0; memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS); } void JitBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag) { - for (int i = 0; i < numBlocks; i++) + for (int i = 0; i < num_blocks; i++) { if (blocks[i].flags & death_flag) { @@ -130,66 +136,64 @@ using namespace Gen; int JitBlockCache::GetNumBlocks() const { - return numBlocks; + return num_blocks; } bool JitBlockCache::RangeIntersect(int s1, int e1, int s2, int e2) const { // check if any endpoint is inside the other range - if ( (s1 >= s2 && s1 <= e2) || - (e1 >= s2 && e1 <= e2) || - (s2 >= s1 && s2 <= e1) || - (e2 >= s1 && e2 <= e1)) + if ((s1 >= s2 && s1 <= e2) || + (e1 >= s2 && e1 <= e2) || + (s2 >= s1 && s2 <= e1) || + (e2 >= s1 && e2 <= e1)) return true; else return false; } - const u8 *Jit(u32 emAddress) + int JitBlockCache::AllocateBlock(u32 em_address) { - return jit.Jit(emAddress); - } - - const u8 *JitBlockCache::Jit(u32 emAddress) - { - JitBlock &b = blocks[numBlocks]; + JitBlock &b = blocks[num_blocks]; b.invalid = false; - b.originalAddress = emAddress; - b.originalFirstOpcode = Memory::ReadFast32(emAddress); + b.originalAddress = em_address; + b.originalFirstOpcode = Memory::ReadFast32(em_address); b.exitAddress[0] = INVALID_EXIT; b.exitAddress[1] = INVALID_EXIT; b.exitPtrs[0] = 0; b.exitPtrs[1] = 0; b.linkStatus[0] = false; b.linkStatus[1] = false; - - blockCodePointers[numBlocks] = (u8*)jit->DoJit(emAddress, b); //cast away const - Memory::WriteUnchecked_U32((JIT_OPCODE << 26) | numBlocks, emAddress); + num_blocks++; //commit the current block + return num_blocks - 1; + } - if (jit->jo.enableBlocklink) { - for (int i = 0; i < 2; i++) { - if (b.exitAddress[i] != INVALID_EXIT) { - links_to.insert(std::pair(b.exitAddress[i], numBlocks)); - } + void JitBlockCache::FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr) + { + blockCodePointers[block_num] = code_ptr; + JitBlock &b = blocks[block_num]; + Memory::WriteUnchecked_U32((JIT_OPCODE << 26) | block_num, blocks[block_num].originalAddress); + if (block_link) + { + for (int i = 0; i < 2; i++) + { + if (b.exitAddress[i] != INVALID_EXIT) + links_to.insert(std::pair(b.exitAddress[i], block_num)); } - LinkBlock(numBlocks); - LinkBlockExits(numBlocks); + LinkBlock(block_num); + LinkBlockExits(block_num); } #ifdef OPROFILE_REPORT char buf[100]; sprintf(buf, "EmuCode%x", emAddress); - u8* blockStart = blockCodePointers[numBlocks], *blockEnd = GetWritableCodePtr(); + u8* blockStart = blockCodePointers[block_num], *blockEnd = GetWritableCodePtr(); op_write_native_code(agent, buf, (uint64_t)blockStart, blockStart, blockEnd - blockStart); #endif - - numBlocks++; //commit the current block - return 0; } - u8 **JitBlockCache::GetCodePointers() + const u8 **JitBlockCache::GetCodePointers() { return blockCodePointers; } @@ -201,18 +205,18 @@ using namespace Gen; u32 code = Memory::ReadFast32(addr); if ((code >> 26) == JIT_OPCODE) { - //jitted code - unsigned int blockNum = code & 0x03FFFFFF; - if (blockNum >= (unsigned int)numBlocks) { + // Jitted code. + unsigned int block = code & 0x03FFFFFF; + if (block >= (unsigned int)num_blocks) { return -1; } - if (blocks[blockNum].originalAddress != addr) + if (blocks[block].originalAddress != addr) { //_assert_msg_(DYNA_REC, 0, "GetBlockFromAddress %08x - No match - This is BAD", addr); return -1; } - return blockNum; + return block; } else { @@ -220,6 +224,13 @@ using namespace Gen; } } +void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers) +{ + for (int i = 0; i < num_blocks; i++) + if (blocks[i].ContainsAddress(em_address)) + block_numbers->push_back(i); +} + u32 JitBlockCache::GetOriginalCode(u32 address) { int num = GetBlockNumberFromStartAddress(address); @@ -308,11 +319,11 @@ using namespace Gen; // TODO - make sure that the below stuff really is safe. // Spurious entrances from previously linked blocks can only come through checkedEntry - XEmitter emit((u8*)b.checkedEntry); + XEmitter emit((u8 *)b.checkedEntry); emit.MOV(32, M(&PC), Imm32(b.originalAddress)); emit.JMP(asm_routines.dispatcher, true); - emit.SetCodePtr(blockCodePointers[blocknum]); + emit.SetCodePtr((u8 *)blockCodePointers[blocknum]); emit.MOV(32, M(&PC), Imm32(b.originalAddress)); emit.JMP(asm_routines.dispatcher, true); } @@ -320,11 +331,11 @@ using namespace Gen; void JitBlockCache::InvalidateCodeRange(u32 address, u32 length) { - if (!jit->jo.enableBlocklink) + if (!jit.jo.enableBlocklink) return; return; //This is slow but should be safe (zelda needs it for block linking) - for (int i = 0; i < numBlocks; i++) + for (int i = 0; i < num_blocks; i++) { if (RangeIntersect(blocks[i].originalAddress, blocks[i].originalAddress + blocks[i].originalSize, address, address + length)) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.h b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.h index 2fe648b403..f75a5e1db2 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.h @@ -19,8 +19,10 @@ #define _JITCACHE_H #include +#include #include "../Gekko.h" +#include "../PPCAnalyst.h" #ifdef _WIN32 #include @@ -55,30 +57,29 @@ struct JitBlock const u8 *checkedEntry; bool invalid; int flags; -}; -class Jit64; + bool ContainsAddress(u32 em_address); +}; typedef void (*CompiledCode)(); class JitBlockCache { - Jit64 *jit; - - u8 **blockCodePointers; + const u8 **blockCodePointers; JitBlock *blocks; - int numBlocks; + int num_blocks; std::multimap links_to; - int MAX_NUM_BLOCKS; + bool RangeIntersect(int s1, int e1, int s2, int e2) const; void LinkBlockExits(int i); void LinkBlock(int i); public: JitBlockCache() {} - void SetJit(Jit64 *jit_) { jit = jit_; } - const u8* Jit(u32 emaddress); + + int AllocateBlock(u32 em_address); + void FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr); void Clear(); void Init(); @@ -88,20 +89,24 @@ public: bool IsFull() const; // Code Cache - JitBlock *GetBlock(int no); + JitBlock *GetBlock(int block_num); int GetNumBlocks() const; - u8 **GetCodePointers(); + const u8 **GetCodePointers(); // Fast way to get a block. Only works on the first ppc instruction of a block. - int GetBlockNumberFromStartAddress(u32 address); - // slower, but can get numbers from within blocks, not just the first instruction. WARNING! DOES NOT WORK WITH INLINING ENABLED (not yet a feature but will be soon) - int GetBlockNumberFromInternalAddress(u32 address); + int GetBlockNumberFromStartAddress(u32 em_address); + + // slower, but can get numbers from within blocks, not just the first instruction. + // WARNING! WILL NOT WORK WITH INLINING ENABLED (not yet a feature but will be soon) + // Returns a list of block numbers - only one block can start at a particular address, but they CAN overlap. + // This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime. + void GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers); u32 GetOriginalCode(u32 address); CompiledCode GetCompiledCodeFromBlock(int blockNumber); // DOES NOT WORK CORRECTLY WITH INLINING - void InvalidateCodeRange(u32 address, u32 length); + void InvalidateCodeRange(u32 em_address, u32 length); void DestroyBlock(int blocknum, bool invalidate); // Not currently used diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitCore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitCore.cpp index 9f60331fae..98a84a3960 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitCore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitCore.cpp @@ -49,7 +49,7 @@ void SingleStep() void Run() { - jit.EnterFastRun(); + jit.Run(); } } // namespace diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp index f668bc9ee6..3cea7be00d 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp @@ -48,6 +48,7 @@ enum CodeBuffer::CodeBuffer(int size) { codebuffer = new PPCAnalyst::CodeOp[size]; + size_ = size; } CodeBuffer::~CodeBuffer() @@ -285,20 +286,20 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b) // Does not yet perform inlining - although there are plans for that. bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer) { - int numCycles = 0; - u32 blockstart = address; memset(st, 0, sizeof(st)); + UGeckoInstruction previnst = Memory::Read_Instruction(address - 4); if (previnst.hex == 0x4e800020) - { st->isFirstBlockOfFunction = true; - } + gpa->any = true; fpa->any = false; - int maxsize = CODEBUFFER_SIZE; + u32 blockstart = address; + int maxsize = buffer->GetSize(); int num_inst = 0; int numFollows = 0; + int numCycles = 0; CodeOp *code = buffer->codebuffer; bool foundExit = false; diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.h b/Source/Core/Core/Src/PowerPC/PPCAnalyst.h index 1e0a72870c..1baf77e84f 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.h @@ -80,12 +80,16 @@ struct BlockRegStats class CodeBuffer { + int size_; public: CodeBuffer(int size); ~CodeBuffer(); + int GetSize() const { return size_; } + PPCAnalyst::CodeOp *codebuffer; - int size_; + + }; bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer); diff --git a/Source/Core/DebuggerWX/Src/JitWindow.cpp b/Source/Core/DebuggerWX/Src/JitWindow.cpp index 1768fd3ef9..17c83854b6 100644 --- a/Source/Core/DebuggerWX/Src/JitWindow.cpp +++ b/Source/Core/DebuggerWX/Src/JitWindow.cpp @@ -214,7 +214,8 @@ void CJitWindow::Compare(u32 em_address) ppc_box->SetValue(wxString::FromAscii((char*)xDis)); } else { - // hmmm + ppc_box->SetValue(wxString::FromAscii(StringFromFormat("(non-code address: %08x)", em_address).c_str())); + x86_box->SetValue(wxString::FromAscii("---")); }