Even more code reorganization + first step towards being able to single step directly in JIT mode.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1609 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-12-20 17:22:30 +00:00
parent 01598750b9
commit 7de995c00c
10 changed files with 165 additions and 101 deletions

View File

@ -158,7 +158,6 @@ ps_adds1
*/
Jit64 jit;
PPCAnalyst::CodeBuffer code_buffer(32000);
int CODE_SIZE = 1024*1024*16;
@ -167,6 +166,11 @@ namespace CPUCompare
extern u32 m_BlockStart;
}
void Jit(u32 em_address)
{
jit.Jit(em_address);
}
void Jit64::Init()
{
if (Core::g_CoreStartupParameter.bJITUnlimitedCache)
@ -206,12 +210,6 @@ namespace CPUCompare
asm_routines.Shutdown();
}
void Jit64::EnterFastRun()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
pExecAddr();
//Will return when PowerPC::state changes
}
void Jit64::WriteCallInterpreter(UGeckoInstruction inst)
{
@ -343,7 +341,25 @@ namespace CPUCompare
JMP(asm_routines.testExceptions, true);
}
const u8 *Jit64::Jit(u32 em_address)
void Jit64::Run()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
pExecAddr();
//Will return when PowerPC::state changes
}
void Jit64::SingleStep()
{
// NOT USED, NOT TESTED, PROBABLY NOT WORKING YET
JitBlock temp_block;
PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step!
const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block);
CompiledCode pExecAddr = (CompiledCode)code;
pExecAddr();
}
void Jit64::Jit(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
{
@ -354,35 +370,33 @@ namespace CPUCompare
}
ClearCache();
}
return blocks.Jit(em_address);
int block_num = blocks.AllocateBlock(em_address);
JitBlock *b = blocks.GetBlock(block_num);
blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b));
}
const u8* Jit64::DoJit(u32 em_address, JitBlock &b)
const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b)
{
if (em_address == 0)
PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR);
// if (em_address == 0x800aa278)
// DebugBreak();
int size;
js.isLastInstruction = false;
js.blockStart = em_address;
js.fifoBytesThisBlock = 0;
js.curBlock = &b;
js.curBlock = b;
js.blockSetsQuantizers = false;
js.block_flags = 0;
js.cancel = false;
//Analyze the block, collect all instructions it is made of (including inlining,
//if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, &code_buffer);
PPCAnalyst::CodeOp *ops = code_buffer.codebuffer;
PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer);
PPCAnalyst::CodeOp *ops = code_buffer->codebuffer;
const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr
b.checkedEntry = start;
b.runCount = 0;
b->checkedEntry = start;
b->runCount = 0;
// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
FixupBranch skip = J_CC(CC_NBE);
@ -417,11 +431,11 @@ namespace CPUCompare
// Conditionally add profiling code.
if (Profiler::g_ProfileBlocks) {
ADD(32, M(&b.runCount), Imm8(1));
ADD(32, M(&b->runCount), Imm8(1));
#ifdef _WIN32
b.ticCounter.QuadPart = 0;
b.ticStart.QuadPart = 0;
b.ticStop.QuadPart = 0;
b->ticCounter.QuadPart = 0;
b->ticStart.QuadPart = 0;
b->ticStop.QuadPart = 0;
#else
//TODO
#endif
@ -445,7 +459,8 @@ namespace CPUCompare
js.compilerPC = ops[i].address;
js.op = &ops[i];
js.instructionNumber = i;
if (i == (int)size - 1) {
if (i == (int)size - 1)
{
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
js.next_inst = 0;
@ -458,7 +473,9 @@ namespace CPUCompare
PROFILER_ADD_DIFF_LARGE_INTEGER(&b.ticCounter, &b.ticStop, &b.ticStart);
PROFILER_VPOP;
}
} else {
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
@ -470,6 +487,12 @@ namespace CPUCompare
CALL(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
}
// If starting from the breakpointed instruction, we don't break.
if (em_address != ops[i].address && BreakPoints::IsAddressBreakPoint(ops[i].address))
{
}
if (!ops[i].skip)
PPCTables::CompileInstruction(ops[i].inst);
@ -479,8 +502,8 @@ namespace CPUCompare
break;
}
b.flags = js.block_flags;
b.codeSize = (u32)(GetCodePtr() - normalEntry);
b.originalSize = size;
b->flags = js.block_flags;
b->codeSize = (u32)(GetCodePtr() - normalEntry);
b->originalSize = size;
return normalEntry;
}

View File

@ -114,8 +114,12 @@ private:
GPRRegCache gpr;
FPURegCache fpr;
// The default code buffer. We keep it around to not have to alloc/dealloc a
// large chunk of memory for each recompiled block.
PPCAnalyst::CodeBuffer code_buffer;
public:
Jit64() {blocks.SetJit(this);}
Jit64() : code_buffer(32000) {}
~Jit64() {}
JitState js;
@ -128,8 +132,8 @@ public:
// Jit!
const u8 *Jit(u32 em_address); // calls blocks.Jit, which in turn calls DoJit below after setting up a block.
const u8* DoJit(u32 em_address, JitBlock &b);
void Jit(u32 em_address);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b);
JitBlockCache *GetBlockCache() { return &blocks; }
@ -143,7 +147,9 @@ public:
// Run!
void EnterFastRun();
void Run();
void SingleStep();
const u8 *BackPatch(u8 *codePtr, int accessType, u32 em_address, CONTEXT *ctx);
#define JIT_OPCODE 0
@ -276,7 +282,7 @@ public:
extern Jit64 jit;
const u8 *Jit(u32 em_address);
void Jit(u32 em_address);
#endif

View File

@ -170,7 +170,18 @@ void AsmRoutineManager::Generate()
#else
//Landing pad for drec space
ABI_PopAllCalleeSavedRegsAndAdjustStack();
#endif
RET();
breakpointBailout = GetCodePtr();
#ifdef _M_IX86
POP(EDI);
POP(ESI);
POP(EBX);
POP(EBP);
#else
//Landing pad for drec space
ABI_PopAllCalleeSavedRegsAndAdjustStack();
#endif
RET();

View File

@ -76,6 +76,8 @@ public:
const u8 *fifoDirectWriteFloat;
const u8 *fifoDirectWriteXmm64;
const u8 *breakpointBailout;
bool compareEnabled;
};

View File

@ -57,6 +57,12 @@ using namespace Gen;
#define INVALID_EXIT 0xFFFFFFFF
bool JitBlock::ContainsAddress(u32 em_address)
{
// WARNING - THIS DOES NOT WORK WITH INLINING ENABLED.
return (em_address >= originalAddress && em_address < originalAddress + originalSize);
}
bool JitBlockCache::IsFull() const
{
return GetNumBlocks() >= MAX_NUM_BLOCKS - 1;
@ -74,7 +80,7 @@ using namespace Gen;
agent = op_open_agent();
#endif
blocks = new JitBlock[MAX_NUM_BLOCKS];
blockCodePointers = new u8*[MAX_NUM_BLOCKS];
blockCodePointers = new const u8*[MAX_NUM_BLOCKS];
Clear();
}
@ -85,7 +91,7 @@ using namespace Gen;
delete [] blockCodePointers;
blocks = 0;
blockCodePointers = 0;
numBlocks = 0;
num_blocks = 0;
#ifdef OPROFILE_REPORT
op_close_agent(agent);
#endif
@ -97,18 +103,18 @@ using namespace Gen;
{
Core::DisplayMessage("Cleared code cache.", 3000);
// Is destroying the blocks really necessary?
for (int i = 0; i < numBlocks; i++)
for (int i = 0; i < num_blocks; i++)
{
DestroyBlock(i, false);
}
links_to.clear();
numBlocks = 0;
num_blocks = 0;
memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS);
}
void JitBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag)
{
for (int i = 0; i < numBlocks; i++)
for (int i = 0; i < num_blocks; i++)
{
if (blocks[i].flags & death_flag)
{
@ -130,7 +136,7 @@ using namespace Gen;
int JitBlockCache::GetNumBlocks() const
{
return numBlocks;
return num_blocks;
}
bool JitBlockCache::RangeIntersect(int s1, int e1, int s2, int e2) const
@ -145,51 +151,49 @@ using namespace Gen;
return false;
}
const u8 *Jit(u32 emAddress)
int JitBlockCache::AllocateBlock(u32 em_address)
{
return jit.Jit(emAddress);
}
const u8 *JitBlockCache::Jit(u32 emAddress)
{
JitBlock &b = blocks[numBlocks];
JitBlock &b = blocks[num_blocks];
b.invalid = false;
b.originalAddress = emAddress;
b.originalFirstOpcode = Memory::ReadFast32(emAddress);
b.originalAddress = em_address;
b.originalFirstOpcode = Memory::ReadFast32(em_address);
b.exitAddress[0] = INVALID_EXIT;
b.exitAddress[1] = INVALID_EXIT;
b.exitPtrs[0] = 0;
b.exitPtrs[1] = 0;
b.linkStatus[0] = false;
b.linkStatus[1] = false;
blockCodePointers[numBlocks] = (u8*)jit->DoJit(emAddress, b); //cast away const
Memory::WriteUnchecked_U32((JIT_OPCODE << 26) | numBlocks, emAddress);
if (jit->jo.enableBlocklink) {
for (int i = 0; i < 2; i++) {
if (b.exitAddress[i] != INVALID_EXIT) {
links_to.insert(std::pair<u32, int>(b.exitAddress[i], numBlocks));
}
num_blocks++; //commit the current block
return num_blocks - 1;
}
LinkBlock(numBlocks);
LinkBlockExits(numBlocks);
void JitBlockCache::FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr)
{
blockCodePointers[block_num] = code_ptr;
JitBlock &b = blocks[block_num];
Memory::WriteUnchecked_U32((JIT_OPCODE << 26) | block_num, blocks[block_num].originalAddress);
if (block_link)
{
for (int i = 0; i < 2; i++)
{
if (b.exitAddress[i] != INVALID_EXIT)
links_to.insert(std::pair<u32, int>(b.exitAddress[i], block_num));
}
LinkBlock(block_num);
LinkBlockExits(block_num);
}
#ifdef OPROFILE_REPORT
char buf[100];
sprintf(buf, "EmuCode%x", emAddress);
u8* blockStart = blockCodePointers[numBlocks], *blockEnd = GetWritableCodePtr();
u8* blockStart = blockCodePointers[block_num], *blockEnd = GetWritableCodePtr();
op_write_native_code(agent, buf, (uint64_t)blockStart,
blockStart, blockEnd - blockStart);
#endif
numBlocks++; //commit the current block
return 0;
}
u8 **JitBlockCache::GetCodePointers()
const u8 **JitBlockCache::GetCodePointers()
{
return blockCodePointers;
}
@ -201,18 +205,18 @@ using namespace Gen;
u32 code = Memory::ReadFast32(addr);
if ((code >> 26) == JIT_OPCODE)
{
//jitted code
unsigned int blockNum = code & 0x03FFFFFF;
if (blockNum >= (unsigned int)numBlocks) {
// Jitted code.
unsigned int block = code & 0x03FFFFFF;
if (block >= (unsigned int)num_blocks) {
return -1;
}
if (blocks[blockNum].originalAddress != addr)
if (blocks[block].originalAddress != addr)
{
//_assert_msg_(DYNA_REC, 0, "GetBlockFromAddress %08x - No match - This is BAD", addr);
return -1;
}
return blockNum;
return block;
}
else
{
@ -220,6 +224,13 @@ using namespace Gen;
}
}
void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers)
{
for (int i = 0; i < num_blocks; i++)
if (blocks[i].ContainsAddress(em_address))
block_numbers->push_back(i);
}
u32 JitBlockCache::GetOriginalCode(u32 address)
{
int num = GetBlockNumberFromStartAddress(address);
@ -312,7 +323,7 @@ using namespace Gen;
emit.MOV(32, M(&PC), Imm32(b.originalAddress));
emit.JMP(asm_routines.dispatcher, true);
emit.SetCodePtr(blockCodePointers[blocknum]);
emit.SetCodePtr((u8 *)blockCodePointers[blocknum]);
emit.MOV(32, M(&PC), Imm32(b.originalAddress));
emit.JMP(asm_routines.dispatcher, true);
}
@ -320,11 +331,11 @@ using namespace Gen;
void JitBlockCache::InvalidateCodeRange(u32 address, u32 length)
{
if (!jit->jo.enableBlocklink)
if (!jit.jo.enableBlocklink)
return;
return;
//This is slow but should be safe (zelda needs it for block linking)
for (int i = 0; i < numBlocks; i++)
for (int i = 0; i < num_blocks; i++)
{
if (RangeIntersect(blocks[i].originalAddress, blocks[i].originalAddress + blocks[i].originalSize,
address, address + length))

View File

@ -19,8 +19,10 @@
#define _JITCACHE_H
#include <map>
#include <vector>
#include "../Gekko.h"
#include "../PPCAnalyst.h"
#ifdef _WIN32
#include <windows.h>
@ -55,30 +57,29 @@ struct JitBlock
const u8 *checkedEntry;
bool invalid;
int flags;
};
class Jit64;
bool ContainsAddress(u32 em_address);
};
typedef void (*CompiledCode)();
class JitBlockCache
{
Jit64 *jit;
u8 **blockCodePointers;
const u8 **blockCodePointers;
JitBlock *blocks;
int numBlocks;
int num_blocks;
std::multimap<u32, int> links_to;
int MAX_NUM_BLOCKS;
bool RangeIntersect(int s1, int e1, int s2, int e2) const;
void LinkBlockExits(int i);
void LinkBlock(int i);
public:
JitBlockCache() {}
void SetJit(Jit64 *jit_) { jit = jit_; }
const u8* Jit(u32 emaddress);
int AllocateBlock(u32 em_address);
void FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr);
void Clear();
void Init();
@ -88,20 +89,24 @@ public:
bool IsFull() const;
// Code Cache
JitBlock *GetBlock(int no);
JitBlock *GetBlock(int block_num);
int GetNumBlocks() const;
u8 **GetCodePointers();
const u8 **GetCodePointers();
// Fast way to get a block. Only works on the first ppc instruction of a block.
int GetBlockNumberFromStartAddress(u32 address);
// slower, but can get numbers from within blocks, not just the first instruction. WARNING! DOES NOT WORK WITH INLINING ENABLED (not yet a feature but will be soon)
int GetBlockNumberFromInternalAddress(u32 address);
int GetBlockNumberFromStartAddress(u32 em_address);
// slower, but can get numbers from within blocks, not just the first instruction.
// WARNING! WILL NOT WORK WITH INLINING ENABLED (not yet a feature but will be soon)
// Returns a list of block numbers - only one block can start at a particular address, but they CAN overlap.
// This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime.
void GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers);
u32 GetOriginalCode(u32 address);
CompiledCode GetCompiledCodeFromBlock(int blockNumber);
// DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateCodeRange(u32 address, u32 length);
void InvalidateCodeRange(u32 em_address, u32 length);
void DestroyBlock(int blocknum, bool invalidate);
// Not currently used

View File

@ -49,7 +49,7 @@ void SingleStep()
void Run()
{
jit.EnterFastRun();
jit.Run();
}
} // namespace

View File

@ -48,6 +48,7 @@ enum
CodeBuffer::CodeBuffer(int size)
{
codebuffer = new PPCAnalyst::CodeOp[size];
size_ = size;
}
CodeBuffer::~CodeBuffer()
@ -285,20 +286,20 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
// Does not yet perform inlining - although there are plans for that.
bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer)
{
int numCycles = 0;
u32 blockstart = address;
memset(st, 0, sizeof(st));
UGeckoInstruction previnst = Memory::Read_Instruction(address - 4);
if (previnst.hex == 0x4e800020)
{
st->isFirstBlockOfFunction = true;
}
gpa->any = true;
fpa->any = false;
int maxsize = CODEBUFFER_SIZE;
u32 blockstart = address;
int maxsize = buffer->GetSize();
int num_inst = 0;
int numFollows = 0;
int numCycles = 0;
CodeOp *code = buffer->codebuffer;
bool foundExit = false;

View File

@ -80,12 +80,16 @@ struct BlockRegStats
class CodeBuffer
{
int size_;
public:
CodeBuffer(int size);
~CodeBuffer();
int GetSize() const { return size_; }
PPCAnalyst::CodeOp *codebuffer;
int size_;
};
bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer);

View File

@ -214,7 +214,8 @@ void CJitWindow::Compare(u32 em_address)
ppc_box->SetValue(wxString::FromAscii((char*)xDis));
} else {
// hmmm
ppc_box->SetValue(wxString::FromAscii(StringFromFormat("(non-code address: %08x)", em_address).c_str()));
x86_box->SetValue(wxString::FromAscii("---"));
}