Adding better register tracking through locals and fixing branches.

LLVM does an amazing job of optimizing this. There are many opportunities
to make it better, too, by preventing spilling where not required or only
spilling/filling things when needed.
This commit is contained in:
Ben Vanik 2013-01-25 00:32:42 -08:00
parent 47481fecf7
commit dcb958de54
9 changed files with 440 additions and 219 deletions

84
TODO.md
View File

@ -43,61 +43,23 @@ indicate expected values.
## Codegen ## Codegen
### Branch generation
Change style to match: http://llvm.org/docs/tutorial/LangImpl5.html
Insert check code, then push_back the branch block and implicit else after
its generated. This ensures ordering stays legit.
### Stack variables
Use stack variables for registers.
- All allocas should go in the entry block.
- Lazily add or just add all registers/etc at the head.
- Must be 1 el, int64
- Reuse through function.
- On FlushRegisters write back to state.
- FlushRegisters on indirect branch or call.
```
/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
/// the function. This is used for mutable variables etc.
static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
const std::string &VarName) {
IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
TheFunction->getEntryBlock().begin());
return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
VarName.c_str());
}
// stash result of above and reuse
// on first use in entry get the value from state?
// Promote allocas to registers.
OurFPM.add(createPromoteMemoryToRegisterPass());
// Do simple "peephole" optimizations and bit-twiddling optzns.
OurFPM.add(createInstructionCombiningPass());
// Reassociate expressions.
OurFPM.add(createReassociatePass());
```
### Tracing
- Trace kernel export info (missing/present/etc).
- Trace user call info (name/?).
- Trace instruction info (disasm).
### Calling convention ### Calling convention
Experiment with fastcc? May need typedef fn ptrs to call into the JITted code. Experiment with fastcc? May need typedef fn ptrs to call into the JITted code.
nonlazybind fn attribute to prevent lazy binding (slow down startup) ### Function calling convention analysis
Track functions to see if they follow the standard calling convention.
This could use the hints from the EH data in the XEX. Looking specifically for
stack prolog/epilog and branches to LR.
Benefits:
- Optimized prolog/epilog generation.
- Local variables for stack storage (alloca/etc) instead of user memory.
- Better return detection and fast returns.
### Indirect branches (ctr/lr) ### Indirect branches (ctr/lr)
emit_control.cc XeEmitBranchTo
Need to take the value in LR/CTR and do something with it.
Return path: Return path:
- In SDB see if the function follows the 'return' semantic: - In SDB see if the function follows the 'return' semantic:
- mfspr LR / mtspr LR/CTR / bcctr -- at end? - mfspr LR / mtspr LR/CTR / bcctr -- at end?
@ -118,32 +80,6 @@ Slow path:
- Call out and do an SDB lookup. - Call out and do an SDB lookup.
- If found, return, add to lookup table, and jump. - If found, return, add to lookup table, and jump.
- If not found, need new function codegen! - If not found, need new function codegen!
If the indirect br looks like it may be local (no stack setup/etc?) then
build a jump table:
```
Branch register with no link:
switch i32 %nia, label %non_local [ i32 0x..., label %loc_...
i32 0x..., label %loc_...
i32 0x..., label %loc_... ]
%non_local: going outside of the function
Could put one of these tables at the bottom of each function and share
it.
This could be done via indirectbr if branchaddress is used to stash the
address. The address must be within the function, though.
Branch register with link:
check, never local?
```
### Caching of register values in basic blocks
Right now the SSA values seem to leak from the blocks somehow. All caching
is disabled.
``` ```
## Debugging ## Debugging

View File

@ -44,16 +44,23 @@ public:
void GenerateBasicBlocks(); void GenerateBasicBlocks();
llvm::BasicBlock* GetBasicBlock(uint32_t address); llvm::BasicBlock* GetBasicBlock(uint32_t address);
llvm::BasicBlock* GetNextBasicBlock(); llvm::BasicBlock* GetNextBasicBlock();
llvm::BasicBlock* GetReturnBasicBlock();
llvm::Function* GetFunction(sdb::FunctionSymbol* fn); llvm::Function* GetFunction(sdb::FunctionSymbol* fn);
int GenerateIndirectionBranch(uint32_t cia, llvm::Value* target,
bool lk, bool likely_local);
llvm::Value* LoadStateValue(uint32_t offset, llvm::Type* type, llvm::Value* LoadStateValue(uint32_t offset, llvm::Type* type,
const char* name = ""); const char* name = "");
void StoreStateValue(uint32_t offset, llvm::Type* type, llvm::Value* value); void StoreStateValue(uint32_t offset, llvm::Type* type, llvm::Value* value);
llvm::Value* cia_value(); llvm::Value* cia_value();
void FlushRegisters(); llvm::Value* SetupRegisterLocal(uint32_t offset, llvm::Type* type,
const char* name);
void FillRegisters();
void SpillRegisters();
llvm::Value* xer_value(); llvm::Value* xer_value();
void update_xer_value(llvm::Value* value); void update_xer_value(llvm::Value* value);
@ -74,6 +81,7 @@ public:
void WriteMemory(llvm::Value* addr, uint32_t size, llvm::Value* value); void WriteMemory(llvm::Value* addr, uint32_t size, llvm::Value* value);
private: private:
void GenerateSharedBlocks();
void GenerateBasicBlock(sdb::FunctionBlock* block, llvm::BasicBlock* bb); void GenerateBasicBlock(sdb::FunctionBlock* block, llvm::BasicBlock* bb);
xe_memory_ref memory_; xe_memory_ref memory_;
@ -83,6 +91,9 @@ private:
llvm::Module* gen_module_; llvm::Module* gen_module_;
llvm::Function* gen_fn_; llvm::Function* gen_fn_;
sdb::FunctionBlock* fn_block_; sdb::FunctionBlock* fn_block_;
llvm::BasicBlock* return_block_;
llvm::BasicBlock* internal_indirection_block_;
llvm::BasicBlock* external_indirection_block_;
llvm::BasicBlock* bb_; llvm::BasicBlock* bb_;
llvm::IRBuilder<>* builder_; llvm::IRBuilder<>* builder_;
@ -92,19 +103,15 @@ private:
uint32_t cia_; uint32_t cia_;
struct { struct {
llvm::Value* indirection_target;
llvm::Value* indirection_cia;
llvm::Value* xer; llvm::Value* xer;
bool xer_dirty;
llvm::Value* lr; llvm::Value* lr;
bool lr_dirty;
llvm::Value* ctr; llvm::Value* ctr;
bool ctr_dirty;
llvm::Value* cr; llvm::Value* cr;
bool cr_dirty;
llvm::Value* gpr[32]; llvm::Value* gpr[32];
uint32_t gpr_dirty_bits; } locals_;
} values_;
}; };

View File

@ -40,6 +40,10 @@
// } // FPRF // } // FPRF
#define kXEPPCRegLR 0xFFFF0001
#define kXEPPCRegCTR 0xFFFF0002
typedef struct XECACHEALIGN xe_float4 { typedef struct XECACHEALIGN xe_float4 {
union { union {
struct { struct {

View File

@ -1,16 +1,15 @@
python xenia-build.py xethunk python xenia-build.py xethunk
python xenia-build.py build python xenia-build.py build
rm build/$1*
./build/xenia/release/xenia-run \ ./build/xenia/release/xenia-run \
private/$1 \ private/$1 \
--optimize_ir_modules=true \ --optimize_ir_modules=true \
--optimize_ir_functions=false \ --optimize_ir_functions=false \
--trace_kernel_calls=true \ --trace_kernel_calls=true \
--trace_user_calls=true \ --trace_user_calls=false \
--trace_instructions=false \ --trace_instructions=false \
2>build/run.llvm.txt 1>build/run.txt 1>build/run.txt
#2>build/run.llvm.txt \
if [ ! -s build/run.llvm.txt ]; then if [ ! -s build/run.llvm.txt ]; then
rm build/run.llvm.txt rm build/run.llvm.txt

View File

@ -10,6 +10,7 @@
#include "cpu/codegen/emit.h" #include "cpu/codegen/emit.h"
#include <xenia/cpu/codegen/function_generator.h> #include <xenia/cpu/codegen/function_generator.h>
#include <xenia/cpu/ppc/state.h>
using namespace llvm; using namespace llvm;
@ -23,8 +24,51 @@ namespace cpu {
namespace codegen { namespace codegen {
int XeEmitBranchTo(FunctionGenerator& g, IRBuilder<>& b, const char* src, int XeEmitIndirectBranchTo(
uint32_t cia, bool lk) { FunctionGenerator& g, IRBuilder<>& b, const char* src, uint32_t cia,
bool lk, uint32_t reg) {
// TODO(benvanik): run a DFA pass to see if we can detect whether this is
// a normal function return that is pulling the LR from the stack that
// it set in the prolog. If so, we can omit the dynamic check!
// NOTE: we avoid spilling registers until we know that the target is not
// a basic block within this function.
Value* target;
switch (reg) {
case kXEPPCRegLR:
target = g.lr_value();
break;
case kXEPPCRegCTR:
target = g.ctr_value();
break;
default:
XEASSERTALWAYS();
return 1;
}
// Dynamic test when branching to LR, which is usually used for the return.
// We only do this if LK=0 as returns wouldn't set LR.
// Ideally it's a return and we can just do a simple ret and be done.
// If it's not, we fall through to the full indirection logic.
if (!lk && reg == kXEPPCRegLR) {
BasicBlock* next_block = g.GetNextBasicBlock();
BasicBlock* mismatch_bb = BasicBlock::Create(*g.context(), "lr_mismatch",
g.gen_fn(), next_block);
Value* lr_cmp = b.CreateICmpEQ(target, ++(g.gen_fn()->arg_begin()));
// The return block will spill registers for us.
b.CreateCondBr(lr_cmp, g.GetReturnBasicBlock(), mismatch_bb);
b.SetInsertPoint(mismatch_bb);
}
// Defer to the generator, which will do fancy things.
bool likely_local = !lk && reg == kXEPPCRegCTR;
return g.GenerateIndirectionBranch(cia, target, lk, likely_local);
}
int XeEmitBranchTo(
FunctionGenerator& g, IRBuilder<>& b, const char* src, uint32_t cia,
bool lk) {
// Get the basic block and switch behavior based on outgoing type. // Get the basic block and switch behavior based on outgoing type.
FunctionBlock* fn_block = g.fn_block(); FunctionBlock* fn_block = g.fn_block();
switch (fn_block->outgoing_type) { switch (fn_block->outgoing_type) {
@ -37,21 +81,24 @@ int XeEmitBranchTo(FunctionGenerator& g, IRBuilder<>& b, const char* src,
} }
case FunctionBlock::kTargetFunction: case FunctionBlock::kTargetFunction:
{ {
// Spill all registers to memory.
// TODO(benvanik): only spill ones used by the target function? Use
// calling convention flags on the function to not spill temp
// registers?
g.SpillRegisters();
Function* target_fn = g.GetFunction(fn_block->outgoing_function); Function* target_fn = g.GetFunction(fn_block->outgoing_function);
Function::arg_iterator args = g.gen_fn()->arg_begin(); Function::arg_iterator args = g.gen_fn()->arg_begin();
Value* statePtr = args; Value* state_ptr = args;
b.CreateCall(target_fn, statePtr); b.CreateCall2(target_fn, state_ptr, b.getInt64(cia + 4));
if (!lk) {
// Tail.
b.CreateRetVoid();
} else {
BasicBlock* next_bb = g.GetNextBasicBlock(); BasicBlock* next_bb = g.GetNextBasicBlock();
if (next_bb) { if (!lk || !next_bb) {
b.CreateBr(next_bb); // Tail. No need to refill the local register values, just return.
} else {
// ?
b.CreateRetVoid(); b.CreateRetVoid();
} } else {
// Refill registers from state.
g.FillRegisters();
b.CreateBr(next_bb);
} }
break; break;
} }
@ -59,15 +106,13 @@ int XeEmitBranchTo(FunctionGenerator& g, IRBuilder<>& b, const char* src,
{ {
// An indirect jump. // An indirect jump.
printf("INDIRECT JUMP VIA LR: %.8X\n", cia); printf("INDIRECT JUMP VIA LR: %.8X\n", cia);
b.CreateRetVoid(); return XeEmitIndirectBranchTo(g, b, src, cia, lk, kXEPPCRegLR);
break;
} }
case FunctionBlock::kTargetCTR: case FunctionBlock::kTargetCTR:
{ {
// An indirect jump. // An indirect jump.
printf("INDIRECT JUMP VIA CTR: %.8X\n", cia); printf("INDIRECT JUMP VIA CTR: %.8X\n", cia);
b.CreateRetVoid(); return XeEmitIndirectBranchTo(g, b, src, cia, lk, kXEPPCRegCTR);
break;
} }
default: default:
case FunctionBlock::kTargetNone: case FunctionBlock::kTargetNone:
@ -95,8 +140,6 @@ XEEMITTER(bx, 0x48000000, I )(FunctionGenerator& g, IRBuilder<>& b, I
g.update_lr_value(b.getInt32(i.address + 4)); g.update_lr_value(b.getInt32(i.address + 4));
} }
g.FlushRegisters();
return XeEmitBranchTo(g, b, "bx", i.address, i.I.LK); return XeEmitBranchTo(g, b, "bx", i.address, i.I.LK);
} }
@ -113,6 +156,10 @@ XEEMITTER(bcx, 0x40000000, B )(FunctionGenerator& g, IRBuilder<>& b, I
// if LK then // if LK then
// LR <- CIA + 4 // LR <- CIA + 4
// NOTE: the condition bits are reversed!
// 01234 (docs)
// 43210 (real)
// TODO(benvanik): this may be wrong and overwrite LRs when not desired! // TODO(benvanik): this may be wrong and overwrite LRs when not desired!
// The docs say always, though... // The docs say always, though...
if (i.B.LK) { if (i.B.LK) {
@ -120,7 +167,7 @@ XEEMITTER(bcx, 0x40000000, B )(FunctionGenerator& g, IRBuilder<>& b, I
} }
Value* ctr_ok = NULL; Value* ctr_ok = NULL;
if (XESELECTBITS(i.B.BO, 4, 4)) { if (XESELECTBITS(i.B.BO, 2, 2)) {
// Ignore ctr. // Ignore ctr.
} else { } else {
// Decrement counter. // Decrement counter.
@ -129,7 +176,7 @@ XEEMITTER(bcx, 0x40000000, B )(FunctionGenerator& g, IRBuilder<>& b, I
ctr = b.CreateSub(ctr, b.getInt64(1)); ctr = b.CreateSub(ctr, b.getInt64(1));
// Ctr check. // Ctr check.
if (XESELECTBITS(i.B.BO, 3, 3)) { if (XESELECTBITS(i.B.BO, 1, 1)) {
ctr_ok = b.CreateICmpEQ(ctr, b.getInt64(0)); ctr_ok = b.CreateICmpEQ(ctr, b.getInt64(0));
} else { } else {
ctr_ok = b.CreateICmpNE(ctr, b.getInt64(0)); ctr_ok = b.CreateICmpNE(ctr, b.getInt64(0));
@ -159,7 +206,6 @@ XEEMITTER(bcx, 0x40000000, B )(FunctionGenerator& g, IRBuilder<>& b, I
ok = cond_ok; ok = cond_ok;
} }
g.FlushRegisters();
// Handle unconditional branches without extra fluff. // Handle unconditional branches without extra fluff.
BasicBlock* original_bb = b.GetInsertBlock(); BasicBlock* original_bb = b.GetInsertBlock();
if (ok) { if (ok) {
@ -196,6 +242,10 @@ XEEMITTER(bcctrx, 0x4C000420, XL )(FunctionGenerator& g, IRBuilder<>& b, I
// if LK then // if LK then
// LR <- CIA + 4 // LR <- CIA + 4
// NOTE: the condition bits are reversed!
// 01234 (docs)
// 43210 (real)
// TODO(benvanik): this may be wrong and overwrite LRs when not desired! // TODO(benvanik): this may be wrong and overwrite LRs when not desired!
// The docs say always, though... // The docs say always, though...
if (i.XL.LK) { if (i.XL.LK) {
@ -221,8 +271,6 @@ XEEMITTER(bcctrx, 0x4C000420, XL )(FunctionGenerator& g, IRBuilder<>& b, I
ok = cond_ok; ok = cond_ok;
} }
g.FlushRegisters();
// Handle unconditional branches without extra fluff. // Handle unconditional branches without extra fluff.
BasicBlock* original_bb = b.GetInsertBlock(); BasicBlock* original_bb = b.GetInsertBlock();
if (ok) { if (ok) {
@ -257,6 +305,10 @@ XEEMITTER(bclrx, 0x4C000020, XL )(FunctionGenerator& g, IRBuilder<>& b, I
// if LK then // if LK then
// LR <- CIA + 4 // LR <- CIA + 4
// NOTE: the condition bits are reversed!
// 01234 (docs)
// 43210 (real)
// TODO(benvanik): this may be wrong and overwrite LRs when not desired! // TODO(benvanik): this may be wrong and overwrite LRs when not desired!
// The docs say always, though... // The docs say always, though...
if (i.XL.LK) { if (i.XL.LK) {
@ -264,7 +316,7 @@ XEEMITTER(bclrx, 0x4C000020, XL )(FunctionGenerator& g, IRBuilder<>& b, I
} }
Value* ctr_ok = NULL; Value* ctr_ok = NULL;
if (XESELECTBITS(i.XL.BO, 4, 4)) { if (XESELECTBITS(i.XL.BO, 2, 2)) {
// Ignore ctr. // Ignore ctr.
} else { } else {
// Decrement counter. // Decrement counter.
@ -273,7 +325,7 @@ XEEMITTER(bclrx, 0x4C000020, XL )(FunctionGenerator& g, IRBuilder<>& b, I
ctr = b.CreateSub(ctr, b.getInt64(1)); ctr = b.CreateSub(ctr, b.getInt64(1));
// Ctr check. // Ctr check.
if (XESELECTBITS(i.XL.BO, 3, 3)) { if (XESELECTBITS(i.XL.BO, 1, 1)) {
ctr_ok = b.CreateICmpEQ(ctr, b.getInt64(0)); ctr_ok = b.CreateICmpEQ(ctr, b.getInt64(0));
} else { } else {
ctr_ok = b.CreateICmpNE(ctr, b.getInt64(0)); ctr_ok = b.CreateICmpNE(ctr, b.getInt64(0));
@ -303,8 +355,6 @@ XEEMITTER(bclrx, 0x4C000020, XL )(FunctionGenerator& g, IRBuilder<>& b, I
ok = cond_ok; ok = cond_ok;
} }
g.FlushRegisters();
// Handle unconditional branches without extra fluff. // Handle unconditional branches without extra fluff.
BasicBlock* original_bb = b.GetInsertBlock(); BasicBlock* original_bb = b.GetInsertBlock();
if (ok) { if (ok) {

View File

@ -49,8 +49,22 @@ FunctionGenerator::FunctionGenerator(
gen_module_ = gen_module; gen_module_ = gen_module;
gen_fn_ = gen_fn; gen_fn_ = gen_fn;
builder_ = new IRBuilder<>(*context_); builder_ = new IRBuilder<>(*context_);
fn_block_ = NULL;
return_block_ = NULL;
internal_indirection_block_ = NULL;
external_indirection_block_ = NULL;
bb_ = NULL;
xe_zero_struct(&values_, sizeof(values_)); locals_.indirection_target = NULL;
locals_.indirection_cia = NULL;
locals_.xer = NULL;
locals_.lr = NULL;
locals_.ctr = NULL;
locals_.cr = NULL;
for (size_t n = 0; n < XECOUNT(locals_.gpr); n++) {
locals_.gpr[n] = NULL;
}
} }
FunctionGenerator::~FunctionGenerator() { FunctionGenerator::~FunctionGenerator() {
@ -87,12 +101,13 @@ void FunctionGenerator::GenerateBasicBlocks() {
builder_->SetInsertPoint(entry); builder_->SetInsertPoint(entry);
if (FLAGS_trace_user_calls) { if (FLAGS_trace_user_calls) {
SpillRegisters();
Value* traceUserCall = gen_module_->getGlobalVariable("XeTraceUserCall"); Value* traceUserCall = gen_module_->getGlobalVariable("XeTraceUserCall");
builder_->CreateCall3( builder_->CreateCall3(
traceUserCall, traceUserCall,
gen_fn_->arg_begin(), gen_fn_->arg_begin(),
builder_->getInt32(fn_->start_address), builder_->getInt64(fn_->start_address),
builder_->getInt32(0)); ++gen_fn_->arg_begin());
} }
// If this function is empty, abort! // If this function is empty, abort!
@ -101,6 +116,11 @@ void FunctionGenerator::GenerateBasicBlocks() {
return; return;
} }
// Create a return block.
// This spills registers and returns. All non-tail returns should branch
// here to do the return and ensure registers are spilled.
return_block_ = BasicBlock::Create(*context_, "return", gen_fn_);
// Pass 1 creates all of the blocks - this way we can branch to them. // Pass 1 creates all of the blocks - this way we can branch to them.
for (std::map<uint32_t, FunctionBlock*>::iterator it = fn_->blocks.begin(); for (std::map<uint32_t, FunctionBlock*>::iterator it = fn_->blocks.begin();
it != fn_->blocks.end(); ++it) { it != fn_->blocks.end(); ++it) {
@ -122,6 +142,50 @@ void FunctionGenerator::GenerateBasicBlocks() {
FunctionBlock* block = it->second; FunctionBlock* block = it->second;
GenerateBasicBlock(block, GetBasicBlock(block->start_address)); GenerateBasicBlock(block, GetBasicBlock(block->start_address));
} }
// Setup the shared return/indirection/etc blocks now that we know all the
// blocks we need and all the registers used.
GenerateSharedBlocks();
}
void FunctionGenerator::GenerateSharedBlocks() {
IRBuilder<>& b = *builder_;
Value* indirect_branch = gen_module_->getGlobalVariable("XeIndirectBranch");
// Setup the spill block in return.
b.SetInsertPoint(return_block_);
SpillRegisters();
b.CreateRetVoid();
// Build indirection block on demand.
// We have already prepped all basic blocks, so we can build these tables now.
if (external_indirection_block_) {
// This will spill registers and call the external function.
// It is only meant for LK=0.
b.SetInsertPoint(external_indirection_block_);
SpillRegisters();
b.CreateCall3(indirect_branch,
gen_fn_->arg_begin(),
b.CreateLoad(locals_.indirection_target),
b.CreateLoad(locals_.indirection_cia));
b.CreateRetVoid();
}
if (internal_indirection_block_) {
// This will not spill registers and instead try to switch on local blocks.
// If it fails then the external indirection path is taken.
// NOTE: we only generate this if a likely local branch is taken.
b.SetInsertPoint(internal_indirection_block_);
SwitchInst* switch_i = b.CreateSwitch(
b.CreateLoad(locals_.indirection_target),
external_indirection_block_,
bbs_.size());
for (std::map<uint32_t, BasicBlock*>::iterator it = bbs_.begin();
it != bbs_.end(); ++it) {
switch_i->addCase(b.getInt64(it->first), it->second);
}
}
} }
void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block, void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block,
@ -147,6 +211,7 @@ void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block,
i.type = ppc::GetInstrType(i.code); i.type = ppc::GetInstrType(i.code);
if (FLAGS_trace_instructions) { if (FLAGS_trace_instructions) {
SpillRegisters();
builder_->CreateCall3( builder_->CreateCall3(
traceInstruction, traceInstruction,
gen_fn_->arg_begin(), gen_fn_->arg_begin(),
@ -176,10 +241,6 @@ void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block,
// If we fall through, create the branch. // If we fall through, create the branch.
if (block->outgoing_type == FunctionBlock::kTargetNone) { if (block->outgoing_type == FunctionBlock::kTargetNone) {
// Flush registers.
// TODO(benvanik): only do this before jumps out.
FlushRegisters();
BasicBlock* next_bb = GetNextBasicBlock(); BasicBlock* next_bb = GetNextBasicBlock();
XEASSERTNOTNULL(next_bb); XEASSERTNOTNULL(next_bb);
builder_->CreateBr(next_bb); builder_->CreateBr(next_bb);
@ -212,6 +273,10 @@ BasicBlock* FunctionGenerator::GetNextBasicBlock() {
return NULL; return NULL;
} }
BasicBlock* FunctionGenerator::GetReturnBasicBlock() {
return return_block_;
}
Function* FunctionGenerator::GetFunction(FunctionSymbol* fn) { Function* FunctionGenerator::GetFunction(FunctionSymbol* fn) {
Function* result = gen_module_->getFunction(StringRef(fn->name)); Function* result = gen_module_->getFunction(StringRef(fn->name));
if (!result) { if (!result) {
@ -221,6 +286,94 @@ Function* FunctionGenerator::GetFunction(FunctionSymbol* fn) {
return result; return result;
} }
int FunctionGenerator::GenerateIndirectionBranch(uint32_t cia, Value* target,
bool lk, bool likely_local) {
// This function is called by the control emitters when they know that an
// indirect branch is required.
// It first tries to see if the branch is to an address within the function
// and, if so, uses a local switch table. If that fails because we don't know
// the block the function is regenerated (ACK!). If the target is external
// then an external call occurs.
IRBuilder<>& b = *builder_;
BasicBlock* next_block = GetNextBasicBlock();
BasicBlock* insert_bb = b.GetInsertBlock();
BasicBlock::iterator insert_bbi = b.GetInsertPoint();
// Request builds of the indirection blocks on demand.
// We can't build here because we don't know what registers will be needed
// yet, so we just create the blocks and let GenerateSharedBlocks handle it
// after we are done with all user instructions.
if (!external_indirection_block_) {
// Setup locals in the entry block.
builder_->SetInsertPoint(&gen_fn_->getEntryBlock(),
gen_fn_->getEntryBlock().begin());
locals_.indirection_target = b.CreateAlloca(
b.getInt64Ty(), 0, "indirection_target");
locals_.indirection_cia = b.CreateAlloca(
b.getInt64Ty(), 0, "indirection_cia");
external_indirection_block_ = BasicBlock::Create(
*context_, "external_indirection_block", gen_fn_, return_block_);
}
if (likely_local && !internal_indirection_block_) {
internal_indirection_block_ = BasicBlock::Create(
*context_, "internal_indirection_block", gen_fn_, return_block_);
}
b.SetInsertPoint(insert_bb, insert_bbi);
// Check to see if the target address is within the function.
// If it is jump to that basic block. If the basic block is not found it means
// we have a jump inside the function that wasn't identified via static
// analysis. These are bad as they require function regeneration.
if (likely_local) {
// Note that we only support LK=0, as we are using shared tables.
XEASSERT(!lk);
b.CreateStore(target, locals_.indirection_target);
b.CreateStore(b.getInt64(cia), locals_.indirection_cia);
Value* fn_ge_cmp = b.CreateICmpUGE(target, b.getInt64(fn_->start_address));
Value* fn_l_cmp = b.CreateICmpULT(target, b.getInt64(fn_->end_address));
Value* fn_target_cmp = b.CreateAnd(fn_ge_cmp, fn_l_cmp);
b.CreateCondBr(fn_target_cmp,
internal_indirection_block_, external_indirection_block_);
return 0;
}
// If we are LK=0 jump to the shared indirection block. This prevents us
// from needing to fill the registers again after the call and shares more
// code.
if (!lk) {
b.CreateStore(target, locals_.indirection_target);
b.CreateStore(b.getInt64(cia), locals_.indirection_cia);
b.CreateBr(external_indirection_block_);
} else {
// Slowest path - spill, call the external function, and fill.
// We should avoid this at all costs.
// Spill registers. We could probably share this.
SpillRegisters();
// TODO(benvanik): keep function pointer lookup local.
Value* indirect_branch = gen_module_->getGlobalVariable("XeIndirectBranch");
b.CreateCall3(indirect_branch,
gen_fn_->arg_begin(),
target,
b.getInt64(cia));
if (next_block) {
// Only refill if not a tail call.
FillRegisters();
b.CreateBr(next_block);
} else {
b.CreateRetVoid();
}
}
return 0;
}
Value* FunctionGenerator::LoadStateValue(uint32_t offset, Type* type, Value* FunctionGenerator::LoadStateValue(uint32_t offset, Type* type,
const char* name) { const char* name) {
PointerType* pointerTy = PointerType::getUnqual(type); PointerType* pointerTy = PointerType::getUnqual(type);
@ -240,12 +393,6 @@ void FunctionGenerator::StoreStateValue(uint32_t offset, Type* type,
Value* address = builder_->CreateConstInBoundsGEP1_64( Value* address = builder_->CreateConstInBoundsGEP1_64(
statePtr, offset); statePtr, offset);
Value* ptr = builder_->CreatePointerCast(address, pointerTy); Value* ptr = builder_->CreatePointerCast(address, pointerTy);
// Widen to target type if needed.
if (!value->getType()->isIntegerTy(type->getIntegerBitWidth())) {
value = builder_->CreateZExt(value, type);
}
builder_->CreateStore(value, ptr); builder_->CreateStore(value, ptr);
} }
@ -253,184 +400,225 @@ Value* FunctionGenerator::cia_value() {
return builder_->getInt32(cia_); return builder_->getInt32(cia_);
} }
void FunctionGenerator::FlushRegisters() { Value* FunctionGenerator::SetupRegisterLocal(uint32_t offset, llvm::Type* type,
const char* name) {
// Insert into the entry block.
BasicBlock* insert_bb = builder_->GetInsertBlock();
BasicBlock::iterator insert_bbi = builder_->GetInsertPoint();
builder_->SetInsertPoint(&gen_fn_->getEntryBlock(),
gen_fn_->getEntryBlock().begin());
Value* v = builder_->CreateAlloca(type, 0, name);
builder_->CreateStore(LoadStateValue(offset, type), v);
builder_->SetInsertPoint(insert_bb, insert_bbi);
return v;
}
void FunctionGenerator::FillRegisters() {
// This updates all of the local register values from the state memory.
// It should be called on function entry for initial setup and after any
// calls that may modify the registers.
if (locals_.xer) {
builder_->CreateStore(LoadStateValue(
offsetof(xe_ppc_state_t, xer),
builder_->getInt64Ty()), locals_.xer);
}
if (locals_.lr) {
builder_->CreateStore(LoadStateValue(
offsetof(xe_ppc_state_t, lr),
builder_->getInt64Ty()), locals_.lr);
}
if (locals_.ctr) {
builder_->CreateStore(LoadStateValue(
offsetof(xe_ppc_state_t, ctr),
builder_->getInt64Ty()), locals_.ctr);
}
if (locals_.cr) {
builder_->CreateStore(LoadStateValue(
offsetof(xe_ppc_state_t, cr),
builder_->getInt64Ty()), locals_.cr);
}
// Note that we skip zero.
for (uint32_t n = 1; n < XECOUNT(locals_.gpr); n++) {
if (locals_.gpr[n]) {
builder_->CreateStore(LoadStateValue(
offsetof(xe_ppc_state_t, r) + 8 * n,
builder_->getInt64Ty()), locals_.gpr[n]);
}
}
}
void FunctionGenerator::SpillRegisters() {
// This flushes all local registers (if written) to the register bank and // This flushes all local registers (if written) to the register bank and
// resets their values. // resets their values.
// //
// TODO(benvanik): only flush if actually required, or selective flushes. // TODO(benvanik): only flush if actually required, or selective flushes.
// xer if (locals_.xer) {
StoreStateValue(
offsetof(xe_ppc_state_t, xer),
builder_->getInt64Ty(),
builder_->CreateLoad(locals_.xer));
}
if (values_.lr && values_.lr_dirty) { if (locals_.lr) {
StoreStateValue( StoreStateValue(
offsetof(xe_ppc_state_t, lr), offsetof(xe_ppc_state_t, lr),
builder_->getInt64Ty(), builder_->getInt64Ty(),
values_.lr); builder_->CreateLoad(locals_.lr));
values_.lr = NULL;
values_.lr_dirty = false;
} }
if (values_.ctr && values_.ctr_dirty) { if (locals_.ctr) {
StoreStateValue( StoreStateValue(
offsetof(xe_ppc_state_t, ctr), offsetof(xe_ppc_state_t, ctr),
builder_->getInt64Ty(), builder_->getInt64Ty(),
values_.ctr); builder_->CreateLoad(locals_.ctr));
values_.ctr = NULL;
values_.ctr_dirty = false;
} }
// TODO(benvanik): don't flush across calls? // TODO(benvanik): don't flush across calls?
if (values_.cr && values_.cr_dirty) { if (locals_.cr) {
StoreStateValue( StoreStateValue(
offsetof(xe_ppc_state_t, cr), offsetof(xe_ppc_state_t, cr),
builder_->getInt64Ty(), builder_->getInt64Ty(),
values_.cr); builder_->CreateLoad(locals_.cr));
values_.cr = NULL;
values_.cr_dirty = false;
} }
for (uint32_t n = 0; n < XECOUNT(values_.gpr); n++) { // Note that we skip zero.
Value* v = values_.gpr[n]; for (uint32_t n = 1; n < XECOUNT(locals_.gpr); n++) {
if (v && (values_.gpr_dirty_bits & (1 << n))) { Value* v = locals_.gpr[n];
if (v) {
StoreStateValue( StoreStateValue(
offsetof(xe_ppc_state_t, r) + 8 * n, offsetof(xe_ppc_state_t, r) + 8 * n,
builder_->getInt64Ty(), builder_->getInt64Ty(),
values_.gpr[n]); builder_->CreateLoad(locals_.gpr[n]));
values_.gpr[n] = NULL;
} }
} }
values_.gpr_dirty_bits = 0;
} }
Value* FunctionGenerator::xer_value() { Value* FunctionGenerator::xer_value() {
if (true) {//!values_.xer) { if (!locals_.xer) {
// Fetch from register bank. locals_.xer = SetupRegisterLocal(
Value* v = LoadStateValue(
offsetof(xe_ppc_state_t, xer), offsetof(xe_ppc_state_t, xer),
builder_->getInt64Ty(), builder_->getInt64Ty(),
"xer_"); "xer");
values_.xer = v;
return v;
} else {
// Return local.
return values_.xer;
} }
return locals_.xer;
} }
void FunctionGenerator::update_xer_value(Value* value) { void FunctionGenerator::update_xer_value(Value* value) {
// Widen to 64bits if needed. // Ensure the register is local.
xer_value();
// Extend to 64bits if needed.
if (!value->getType()->isIntegerTy(64)) { if (!value->getType()->isIntegerTy(64)) {
value = builder_->CreateZExt(value, builder_->getInt64Ty()); value = builder_->CreateZExt(value, builder_->getInt64Ty());
} }
builder_->CreateStore(value, locals_.xer);
values_.xer = value;
values_.xer_dirty = true;
} }
Value* FunctionGenerator::lr_value() { Value* FunctionGenerator::lr_value() {
if (true) {//!values_.lr) { if (!locals_.lr) {
// Fetch from register bank. locals_.lr = SetupRegisterLocal(
Value* v = LoadStateValue(
offsetof(xe_ppc_state_t, lr), offsetof(xe_ppc_state_t, lr),
builder_->getInt64Ty(), builder_->getInt64Ty(),
"lr_"); "lr");
values_.lr = v;
return v;
} else {
// Return local.
return values_.lr;
} }
return builder_->CreateLoad(locals_.lr);
} }
void FunctionGenerator::update_lr_value(Value* value) { void FunctionGenerator::update_lr_value(Value* value) {
// Widen to 64bits if needed. // Ensure the register is local.
lr_value();
// Extend to 64bits if needed.
if (!value->getType()->isIntegerTy(64)) { if (!value->getType()->isIntegerTy(64)) {
value = builder_->CreateZExt(value, builder_->getInt64Ty()); value = builder_->CreateZExt(value, builder_->getInt64Ty());
} }
builder_->CreateStore(value, locals_.lr);
values_.lr = value;
values_.lr_dirty = true;
} }
Value* FunctionGenerator::ctr_value() { Value* FunctionGenerator::ctr_value() {
if (true) {//!values_.ctr) { if (!locals_.ctr) {
// Fetch from register bank. locals_.ctr = SetupRegisterLocal(
Value* v = LoadStateValue(
offsetof(xe_ppc_state_t, ctr), offsetof(xe_ppc_state_t, ctr),
builder_->getInt64Ty(), builder_->getInt64Ty(),
"ctr_"); "ctr");
values_.ctr = v;
return v;
} else {
// Return local.
return values_.ctr;
} }
return builder_->CreateLoad(locals_.ctr);
} }
void FunctionGenerator::update_ctr_value(Value* value) { void FunctionGenerator::update_ctr_value(Value* value) {
// Widen to 64bits if needed. // Ensure the register is local.
ctr_value();
// Extend to 64bits if needed.
if (!value->getType()->isIntegerTy(64)) { if (!value->getType()->isIntegerTy(64)) {
value = builder_->CreateZExt(value, builder_->getInt64Ty()); value = builder_->CreateZExt(value, builder_->getInt64Ty());
} }
builder_->CreateStore(value, locals_.ctr);
values_.ctr = value;
values_.ctr_dirty = true;
} }
Value* FunctionGenerator::cr_value() { Value* FunctionGenerator::cr_value() {
if (true) {//!values_.cr) { if (!locals_.cr) {
// Fetch from register bank. locals_.cr = SetupRegisterLocal(
Value* v = LoadStateValue(
offsetof(xe_ppc_state_t, cr), offsetof(xe_ppc_state_t, cr),
builder_->getInt64Ty(), builder_->getInt64Ty(),
"cr_"); "cr");
values_.cr = v;
return v;
} else {
// Return local.
return values_.cr;
} }
return builder_->CreateLoad(locals_.cr);
} }
void FunctionGenerator::update_cr_value(Value* value) { void FunctionGenerator::update_cr_value(Value* value) {
values_.cr = value; // Ensure the register is local.
values_.cr_dirty = true; cr_value();
// Extend to 64bits if needed.
if (!value->getType()->isIntegerTy(64)) {
value = builder_->CreateZExt(value, builder_->getInt64Ty());
}
builder_->CreateStore(value, locals_.cr);
} }
Value* FunctionGenerator::gpr_value(uint32_t n) { Value* FunctionGenerator::gpr_value(uint32_t n) {
XEASSERT(n >= 0 && n < 32);
if (n == 0) { if (n == 0) {
// Always force zero to a constant - this should help LLVM. // Always force zero to a constant - this should help LLVM.
return builder_->getInt64(0); return builder_->getInt64(0);
} }
if (true) {//!values_.gpr[n]) { if (!locals_.gpr[n]) {
// Need to fetch from register bank.
char name[30]; char name[30];
xesnprintfa(name, XECOUNT(name), "gpr_r%d_", n); xesnprintfa(name, XECOUNT(name), "gpr_r%d", n);
Value* v = LoadStateValue( locals_.gpr[n] = SetupRegisterLocal(
offsetof(xe_ppc_state_t, r) + 8 * n, offsetof(xe_ppc_state_t, r) + 8 * n,
builder_->getInt64Ty(), builder_->getInt64Ty(),
name); name);
values_.gpr[n] = v;
return v;
} else {
// Local value, reuse.
return values_.gpr[n];
} }
return builder_->CreateLoad(locals_.gpr[n]);
} }
void FunctionGenerator::update_gpr_value(uint32_t n, Value* value) { void FunctionGenerator::update_gpr_value(uint32_t n, Value* value) {
XEASSERT(n >= 0 && n < 32);
if (n == 0) { if (n == 0) {
// Ignore writes to zero. // Ignore writes to zero.
return; return;
} }
// Widen to 64bits if needed. // Ensure the register is local.
gpr_value(n);
// Extend to 64bits if needed.
if (!value->getType()->isIntegerTy(64)) { if (!value->getType()->isIntegerTy(64)) {
value = builder_->CreateZExt(value, builder_->getInt64Ty()); value = builder_->CreateZExt(value, builder_->getInt64Ty());
} }
values_.gpr[n] = value; builder_->CreateStore(value, locals_.gpr[n]);
values_.gpr_dirty_bits |= 1 << n;
} }
Value* FunctionGenerator::GetMembase() { Value* FunctionGenerator::GetMembase() {

View File

@ -138,6 +138,7 @@ Function* ModuleGenerator::CreateFunctionDefinition(const char* name) {
std::vector<Type*> args; std::vector<Type*> args;
args.push_back(PointerType::getUnqual(Type::getInt8Ty(context))); args.push_back(PointerType::getUnqual(Type::getInt8Ty(context)));
args.push_back(Type::getInt64Ty(context));
Type* return_type = Type::getVoidTy(context); Type* return_type = Type::getVoidTy(context);
FunctionType* ft = FunctionType::get(return_type, FunctionType* ft = FunctionType::get(return_type,
@ -159,11 +160,16 @@ Function* ModuleGenerator::CreateFunctionDefinition(const char* name) {
Value* fn_arg = fn_args++; Value* fn_arg = fn_args++;
fn_arg->setName("state"); fn_arg->setName("state");
f->setDoesNotAlias(1); f->setDoesNotAlias(1);
f->setDoesNotCapture(1);
// 'state' should try to be in a register, if possible. // 'state' should try to be in a register, if possible.
// TODO(benvanik): verify that's a good idea. // TODO(benvanik): verify that's a good idea.
// f->getArgumentList().begin()->addAttr( // f->getArgumentList().begin()->addAttr(
// Attribute::get(context, AttrBuilder().addAttribute(Attribute::InReg))); // Attribute::get(context, AttrBuilder().addAttribute(Attribute::InReg)));
// 'lr'
fn_arg = fn_args++;
fn_arg->setName("lr");
return f; return f;
}; };
@ -183,8 +189,8 @@ void ModuleGenerator::AddMissingImport(FunctionSymbol* fn) {
builder.CreateCall3( builder.CreateCall3(
traceKernelCall, traceKernelCall,
f->arg_begin(), f->arg_begin(),
builder.getInt32(fn->start_address), builder.getInt64(fn->start_address),
builder.getInt32(0)); ++f->arg_begin());
} }
builder.CreateRetVoid(); builder.CreateRetVoid();

View File

@ -202,14 +202,19 @@ XECLEANUP:
return result_code; return result_code;
} }
void XeTraceKernelCall(xe_ppc_state_t* state, uint32_t cia, uint32_t call_ia) { void XeIndirectBranch(xe_ppc_state_t* state, uint64_t target, uint64_t br_ia) {
// TODO(benvanik): get names printf("INDIRECT BRANCH %.8X -> %.8X\n", (uint32_t)br_ia, (uint32_t)target);
XELOGCPU("TRACE: %.8X -> k.%.8X", call_ia, cia); XEASSERTALWAYS();
} }
void XeTraceUserCall(xe_ppc_state_t* state, uint32_t cia, uint32_t call_ia) { void XeTraceKernelCall(xe_ppc_state_t* state, uint64_t cia, uint64_t call_ia) {
// TODO(benvanik): get names // TODO(benvanik): get names
XELOGCPU("TRACE: %.8X -> u.%.8X", call_ia, cia); XELOGCPU("TRACE: %.8X -> k.%.8X", (uint32_t)call_ia, (uint32_t)cia);
}
void XeTraceUserCall(xe_ppc_state_t* state, uint64_t cia, uint64_t call_ia) {
// TODO(benvanik): get names
XELOGCPU("TRACE: %.8X -> u.%.8X", (uint32_t)call_ia, (uint32_t)cia);
} }
void XeTraceInstruction(xe_ppc_state_t* state, uint32_t cia, uint32_t data) { void XeTraceInstruction(xe_ppc_state_t* state, uint32_t cia, uint32_t data) {
@ -244,11 +249,23 @@ int ExecModule::InjectGlobals() {
ConstantInt::get(intPtrTy, (uintptr_t)xe_memory_addr(memory_, 0)), ConstantInt::get(intPtrTy, (uintptr_t)xe_memory_addr(memory_, 0)),
int8PtrTy)); int8PtrTy));
// Control methods:
std::vector<Type*> indirectBranchArgs;
indirectBranchArgs.push_back(int8PtrTy);
indirectBranchArgs.push_back(Type::getInt64Ty(context));
indirectBranchArgs.push_back(Type::getInt64Ty(context));
FunctionType* indirectBranchTy = FunctionType::get(
Type::getVoidTy(context), indirectBranchArgs, false);
gv = new GlobalVariable(*gen_module_, indirectBranchTy, true,
GlobalVariable::ExternalLinkage, 0,
"XeIndirectBranch");
engine_->addGlobalMapping(gv, (void*)&XeIndirectBranch);
// Tracing methods: // Tracing methods:
std::vector<Type*> traceCallArgs; std::vector<Type*> traceCallArgs;
traceCallArgs.push_back(int8PtrTy); traceCallArgs.push_back(int8PtrTy);
traceCallArgs.push_back(Type::getInt32Ty(context)); traceCallArgs.push_back(Type::getInt64Ty(context));
traceCallArgs.push_back(Type::getInt32Ty(context)); traceCallArgs.push_back(Type::getInt64Ty(context));
FunctionType* traceCallTy = FunctionType::get( FunctionType* traceCallTy = FunctionType::get(
Type::getVoidTy(context), traceCallArgs, false); Type::getVoidTy(context), traceCallArgs, false);
std::vector<Type*> traceInstructionArgs; std::vector<Type*> traceInstructionArgs;

View File

@ -588,6 +588,20 @@ int SymbolDatabase::AnalyzeFunction(FunctionSymbol* fn) {
ends_fn = true; ends_fn = true;
} }
ends_block = true; ends_block = true;
} else if (i.code == 0x4E800420) {
// bctr -- unconditional branch to CTR.
// This is generally a jump to a function pointer (non-return).
block->outgoing_type = FunctionBlock::kTargetCTR;
if (furthest_target > addr) {
// Remaining targets within function, not end.
XELOGSDB("ignoring bctr %.8X (branch to %.8X)\n", addr,
furthest_target);
} else {
// Function end point.
XELOGSDB("function end %.8X\n", addr);
ends_fn = true;
}
ends_block = true;
} else if (i.type->opcode == 0x48000000) { } else if (i.type->opcode == 0x48000000) {
// b/ba/bl/bla // b/ba/bl/bla
uint32_t target = XEEXTS26(i.I.LI << 2) + (i.I.AA ? 0 : (int32_t)addr); uint32_t target = XEEXTS26(i.I.LI << 2) + (i.I.AA ? 0 : (int32_t)addr);