A probably-working register allocator.
This commit is contained in:
parent
6bd214af0b
commit
4a584129d2
|
@ -74,15 +74,19 @@ int IVMAssembler::Assemble(
|
|||
builder->ResetLabelTags();
|
||||
|
||||
// Function prologue.
|
||||
size_t stack_size = 0;
|
||||
size_t stack_offset = 0;
|
||||
auto locals = builder->locals();
|
||||
for (auto it = locals.begin(); it != locals.end(); ++it) {
|
||||
auto slot = *it;
|
||||
size_t stack_offset = stack_size;
|
||||
size_t type_size = GetTypeSize(slot->type);
|
||||
// Align to natural size.
|
||||
stack_offset = XEALIGN(stack_offset, type_size);
|
||||
slot->set_constant(stack_offset);
|
||||
stack_size += GetTypeSize(slot->type);
|
||||
stack_offset += type_size;
|
||||
}
|
||||
ctx.stack_size = stack_size;
|
||||
// Ensure 16b alignment.
|
||||
stack_offset = XEALIGN(stack_offset, 16);
|
||||
ctx.stack_size = stack_offset;
|
||||
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
|
|
|
@ -38,14 +38,14 @@ int IVMBackend::Initialize() {
|
|||
0,
|
||||
"gpr",
|
||||
MachineInfo::RegisterSet::INT_TYPES,
|
||||
10,
|
||||
6,
|
||||
};
|
||||
machine_info_.register_sets[1] = {
|
||||
1,
|
||||
"vec",
|
||||
MachineInfo::RegisterSet::FLOAT_TYPES |
|
||||
MachineInfo::RegisterSet::VEC_TYPES,
|
||||
10,
|
||||
6,
|
||||
};
|
||||
|
||||
alloy::tracing::WriteEvent(EventType::Init({
|
||||
|
|
|
@ -54,7 +54,7 @@ int X64Emitter::Initialize() {
|
|||
}
|
||||
|
||||
int X64Emitter::Emit(
|
||||
HIRBuilder* builder,
|
||||
HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, runtime::DebugInfo* debug_info,
|
||||
void*& out_code_address, size_t& out_code_size) {
|
||||
// Reset.
|
||||
|
@ -98,8 +98,6 @@ void* X64Emitter::Emplace(size_t stack_size) {
|
|||
return new_address;
|
||||
}
|
||||
|
||||
#define XEALIGN(value, align) ((value + align - 1) & ~(align - 1))
|
||||
|
||||
int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
||||
// These are the registers we will not be using. All others are fare game.
|
||||
const uint32_t reserved_regs =
|
||||
|
@ -220,7 +218,7 @@ void X64Emitter::ResetRegisters(uint32_t reserved_regs) {
|
|||
if (live_regs & 0x1) {
|
||||
auto v = reg_state_.reg_values[n];
|
||||
if (v) {
|
||||
v->reg = -1;
|
||||
v->reg.index = -1;
|
||||
}
|
||||
}
|
||||
reg_state_.reg_values[n] = 0;
|
||||
|
|
|
@ -15,8 +15,9 @@
|
|||
#include <alloy/compiler/passes/context_promotion_pass.h>
|
||||
#include <alloy/compiler/passes/data_flow_analysis_pass.h>
|
||||
#include <alloy/compiler/passes/dead_code_elimination_pass.h>
|
||||
//#include <alloy/compiler/passes/dead_store_elimination_pass.h>
|
||||
#include <alloy/compiler/passes/finalization_pass.h>
|
||||
//#include <alloy/compiler/passes/dead_store_elimination_pass.h>
|
||||
#include <alloy/compiler/passes/register_allocation_pass.h>
|
||||
#include <alloy/compiler/passes/simplification_pass.h>
|
||||
#include <alloy/compiler/passes/validation_pass.h>
|
||||
#include <alloy/compiler/passes/value_reduction_pass.h>
|
||||
|
@ -137,5 +138,42 @@
|
|||
// store_context +302, v5
|
||||
// branch_true v5, ...
|
||||
//
|
||||
// - X86Canonicalization
|
||||
// For various opcodes add copies/commute the arguments to match x86
|
||||
// operand semantics. This makes code generation easier and if done
|
||||
// before register allocation can prevent a lot of extra shuffling in
|
||||
// the emitted code.
|
||||
//
|
||||
// Example:
|
||||
// <block0>:
|
||||
// v0 = ...
|
||||
// v1 = ...
|
||||
// v2 = add v0, v1 <-- v1 now unused
|
||||
// Becomes:
|
||||
// v0 = ...
|
||||
// v1 = ...
|
||||
// v1 = add v1, v0 <-- src1 = dest/src, so reuse for both
|
||||
// by commuting and setting dest = src1
|
||||
//
|
||||
// - RegisterAllocation
|
||||
// Given a machine description (register classes, counts) run over values
|
||||
// and assign them to registers, adding spills as needed. It should be
|
||||
// possible to directly emit code from this form.
|
||||
//
|
||||
// Example:
|
||||
// <block0>:
|
||||
// v0 = load_context +0
|
||||
// v1 = load_context +1
|
||||
// v0 = add v0, v1
|
||||
// ...
|
||||
// v2 = mul v0, v1
|
||||
// Becomes:
|
||||
// reg0 = load_context +0
|
||||
// reg1 = load_context +1
|
||||
// reg2 = add reg0, reg1
|
||||
// store_local +123, reg2 <-- spill inserted
|
||||
// ...
|
||||
// reg0 = load_local +123 <-- load inserted
|
||||
// reg0 = mul reg0, reg1
|
||||
|
||||
#endif // ALLOY_COMPILER_COMPILER_PASSES_H_
|
||||
|
|
|
@ -41,19 +41,21 @@ int ControlFlowAnalysisPass::Run(HIRBuilder* builder) {
|
|||
// Add edges.
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
auto instr = block->instr_head;
|
||||
auto instr = block->instr_tail;
|
||||
while (instr) {
|
||||
if (instr->opcode->flags & OPCODE_FLAG_BRANCH) {
|
||||
if (instr->opcode == &OPCODE_BRANCH_info) {
|
||||
auto label = instr->src1.label;
|
||||
builder->AddEdge(block, label->block, Edge::UNCONDITIONAL);
|
||||
break;
|
||||
} else if (instr->opcode == &OPCODE_BRANCH_TRUE_info ||
|
||||
instr->opcode == &OPCODE_BRANCH_FALSE_info) {
|
||||
auto label = instr->src2.label;
|
||||
builder->AddEdge(block, label->block, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
instr = instr->next;
|
||||
instr = instr->prev;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,471 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/compiler/passes/register_allocation_pass.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::compiler;
|
||||
using namespace alloy::compiler::passes;
|
||||
using namespace alloy::hir;
|
||||
|
||||
|
||||
struct RegisterAllocationPass::Interval {
|
||||
uint32_t start_ordinal;
|
||||
uint32_t end_ordinal;
|
||||
Value* value;
|
||||
RegisterFreeUntilSet* free_until_set;
|
||||
// TODO(benvanik): reduce to offsets in arena?
|
||||
struct Interval* next;
|
||||
struct Interval* prev;
|
||||
|
||||
void AddToList(Interval** list_head) {
|
||||
auto list_next = *list_head;
|
||||
this->next = list_next;
|
||||
if (list_next) {
|
||||
list_next->prev = this;
|
||||
}
|
||||
*list_head = this;
|
||||
}
|
||||
|
||||
void InsertIntoList(Interval** list_head) {
|
||||
auto it = *list_head;
|
||||
while (it) {
|
||||
if (it->start_ordinal > this->start_ordinal) {
|
||||
// Went too far. Insert before this interval.
|
||||
this->prev = it->prev;
|
||||
this->next = it;
|
||||
if (it->prev) {
|
||||
it->prev->next = this;
|
||||
} else {
|
||||
*list_head = this;
|
||||
}
|
||||
it->prev = this;
|
||||
return;
|
||||
}
|
||||
if (!it->next) {
|
||||
// None found, add at tail.
|
||||
it->next = this;
|
||||
this->prev = it;
|
||||
return;
|
||||
}
|
||||
it = it->next;
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveFromList(Interval** list_head) {
|
||||
if (this->next) {
|
||||
this->next->prev = this->prev;
|
||||
}
|
||||
if (this->prev) {
|
||||
this->prev->next = this->next;
|
||||
} else {
|
||||
*list_head = this->next;
|
||||
}
|
||||
this->next = this->prev = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
struct RegisterAllocationPass::Intervals {
|
||||
Interval* unhandled;
|
||||
Interval* active;
|
||||
Interval* handled;
|
||||
};
|
||||
|
||||
RegisterAllocationPass::RegisterAllocationPass(
|
||||
const MachineInfo* machine_info) :
|
||||
machine_info_(machine_info),
|
||||
CompilerPass() {
|
||||
// Initialize register sets. The values of these will be
|
||||
// cleared before use, so just the structure is required.
|
||||
auto mi_sets = machine_info->register_sets;
|
||||
xe_zero_struct(&free_until_sets_, sizeof(free_until_sets_));
|
||||
uint32_t n = 0;
|
||||
while (mi_sets[n].count) {
|
||||
auto& mi_set = mi_sets[n];
|
||||
auto free_until_set = new RegisterFreeUntilSet();
|
||||
free_until_sets_.all_sets[n] = free_until_set;
|
||||
free_until_set->count = mi_set.count;
|
||||
free_until_set->set = &mi_set;
|
||||
if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) {
|
||||
free_until_sets_.int_set = free_until_set;
|
||||
}
|
||||
if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) {
|
||||
free_until_sets_.float_set = free_until_set;
|
||||
}
|
||||
if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) {
|
||||
free_until_sets_.vec_set = free_until_set;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
RegisterAllocationPass::~RegisterAllocationPass() {
|
||||
for (size_t n = 0; n < XECOUNT(free_until_sets_.all_sets); n++) {
|
||||
if (!free_until_sets_.all_sets[n]) {
|
||||
break;
|
||||
}
|
||||
delete free_until_sets_.all_sets[n];
|
||||
}
|
||||
}
|
||||
|
||||
int RegisterAllocationPass::Run(HIRBuilder* builder) {
|
||||
// A (probably broken) implementation of a linear scan register allocator
|
||||
// that operates directly on SSA form:
|
||||
// http://www.christianwimmer.at/Publications/Wimmer10a/Wimmer10a.pdf
|
||||
//
|
||||
// Requirements:
|
||||
// - SSA form (single definition for variables)
|
||||
// - block should be in linear order:
|
||||
// - dominators *should* come before (a->b->c)
|
||||
// - loop block sequences *should not* have intervening non-loop blocks
|
||||
|
||||
auto arena = scratch_arena();
|
||||
|
||||
// Renumber everything.
|
||||
uint32_t block_ordinal = 0;
|
||||
uint32_t instr_ordinal = 0;
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
// Sequential block ordinals.
|
||||
block->ordinal = block_ordinal++;
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
// Sequential global instruction ordinals.
|
||||
instr->ordinal = instr_ordinal++;
|
||||
instr = instr->next;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
// Compute all liveness ranges by walking forward through all
|
||||
// blocks/instructions and checking the last use of each value. This lets
|
||||
// us know the exact order in (block#,instr#) form, which is then used to
|
||||
// setup the range.
|
||||
// TODO(benvanik): ideally we would have a list of all values and not have
|
||||
// to keep walking instructions over and over.
|
||||
Interval* prev_interval = NULL;
|
||||
Interval* head_interval = NULL;
|
||||
block = builder->first_block();
|
||||
while (block) {
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
// Compute last-use for the dest value.
|
||||
// Since we know all values of importance must be defined, we can avoid
|
||||
// having to check every value and just look at dest.
|
||||
const OpcodeInfo* info = instr->opcode;
|
||||
if (GET_OPCODE_SIG_TYPE_DEST(info->signature) == OPCODE_SIG_TYPE_V) {
|
||||
auto v = instr->dest;
|
||||
if (!v->last_use) {
|
||||
ComputeLastUse(v);
|
||||
}
|
||||
|
||||
// Add interval.
|
||||
auto interval = arena->Alloc<Interval>();
|
||||
interval->start_ordinal = instr->ordinal;
|
||||
interval->end_ordinal = v->last_use ?
|
||||
v->last_use->ordinal : v->def->ordinal;
|
||||
interval->value = v;
|
||||
interval->next = NULL;
|
||||
interval->prev = prev_interval;
|
||||
if (prev_interval) {
|
||||
prev_interval->next = interval;
|
||||
} else {
|
||||
head_interval = interval;
|
||||
}
|
||||
prev_interval = interval;
|
||||
|
||||
// Grab register set to use.
|
||||
// We do this now so it's only once per interval, and it makes it easy
|
||||
// to only compare intervals that overlap their sets.
|
||||
if (v->type <= INT64_TYPE) {
|
||||
interval->free_until_set = free_until_sets_.int_set;
|
||||
} else if (v->type <= FLOAT64_TYPE) {
|
||||
interval->free_until_set = free_until_sets_.float_set;
|
||||
} else {
|
||||
interval->free_until_set = free_until_sets_.vec_set;
|
||||
}
|
||||
}
|
||||
|
||||
instr = instr->next;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
// Now have a sorted list of intervals, minus their ending ordinals.
|
||||
Intervals intervals;
|
||||
intervals.unhandled = head_interval;
|
||||
intervals.active = intervals.handled = NULL;
|
||||
while (intervals.unhandled) {
|
||||
// Get next unhandled interval.
|
||||
auto current = intervals.unhandled;
|
||||
intervals.unhandled = intervals.unhandled->next;
|
||||
current->RemoveFromList(&intervals.unhandled);
|
||||
|
||||
// Check for intervals in active that are handled or inactive.
|
||||
auto it = intervals.active;
|
||||
while (it) {
|
||||
auto next = it->next;
|
||||
if (it->end_ordinal <= current->start_ordinal) {
|
||||
// Move from active to handled.
|
||||
it->RemoveFromList(&intervals.active);
|
||||
it->AddToList(&intervals.handled);
|
||||
}
|
||||
it = next;
|
||||
}
|
||||
|
||||
// Find a register for current.
|
||||
if (!TryAllocateFreeReg(current, intervals)) {
|
||||
// Failed, spill.
|
||||
AllocateBlockedReg(builder, current, intervals);
|
||||
}
|
||||
|
||||
if (current->value->reg.index!= -1) {
|
||||
// Add current to active.
|
||||
current->AddToList(&intervals.active);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RegisterAllocationPass::ComputeLastUse(Value* value) {
|
||||
// TODO(benvanik): compute during construction?
|
||||
// Note that this list isn't sorted (unfortunately), so we have to scan
|
||||
// them all.
|
||||
uint32_t max_ordinal = 0;
|
||||
Value::Use* last_use = NULL;
|
||||
auto use = value->use_head;
|
||||
while (use) {
|
||||
if (!last_use || use->instr->ordinal >= max_ordinal) {
|
||||
last_use = use;
|
||||
max_ordinal = use->instr->ordinal;
|
||||
}
|
||||
use = use->next;
|
||||
}
|
||||
value->last_use = last_use ? last_use->instr : NULL;
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::TryAllocateFreeReg(
|
||||
Interval* current, Intervals& intervals) {
|
||||
// Reset all registers in the set to unused.
|
||||
auto free_until_set = current->free_until_set;
|
||||
for (uint32_t n = 0; n < free_until_set->count; n++) {
|
||||
free_until_set->pos[n] = -1;
|
||||
}
|
||||
|
||||
// Mark all active registers as used.
|
||||
// TODO(benvanik): keep some kind of bitvector so that this is instant?
|
||||
auto it = intervals.active;
|
||||
while (it) {
|
||||
if (it->free_until_set == free_until_set) {
|
||||
free_until_set->pos[it->value->reg.index] = 0;
|
||||
}
|
||||
it = it->next;
|
||||
}
|
||||
|
||||
uint32_t max_pos = 0;
|
||||
for (uint32_t n = 0; n < free_until_set->count; n++) {
|
||||
if (max_pos == -1) {
|
||||
max_pos = n;
|
||||
} else {
|
||||
if (free_until_set->pos[n] > free_until_set->pos[max_pos]) {
|
||||
max_pos = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!free_until_set->pos[max_pos]) {
|
||||
// No register available without spilling.
|
||||
return false;
|
||||
}
|
||||
if (current->end_ordinal < free_until_set->pos[max_pos]) {
|
||||
// Register available for the whole interval.
|
||||
current->value->reg.set = free_until_set->set;
|
||||
current->value->reg.index = max_pos;
|
||||
} else {
|
||||
// Register available for the first part of the interval.
|
||||
// Split the interval at where it hits the next one.
|
||||
//current->value->reg = max_pos;
|
||||
//SplitRange(current, free_until_set->pos[max_pos]);
|
||||
// TODO(benvanik): actually split -- for now we just spill.
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void RegisterAllocationPass::AllocateBlockedReg(
|
||||
HIRBuilder* builder, Interval* current, Intervals& intervals) {
|
||||
auto free_until_set = current->free_until_set;
|
||||
|
||||
// TODO(benvanik): smart heuristics.
|
||||
// wimmer AllocateBlockedReg has some stuff for deciding whether to
|
||||
// spill current or some other active interval - which we ignore.
|
||||
|
||||
// Pick a random interval. Maybe the first. Sure.
|
||||
auto spill_interval = intervals.active;
|
||||
Value* spill_value = NULL;
|
||||
Instr* prev_use = NULL;
|
||||
Instr* next_use = NULL;
|
||||
while (spill_interval) {
|
||||
if (spill_interval->free_until_set != free_until_set ||
|
||||
spill_interval->start_ordinal == current->start_ordinal) {
|
||||
// Only interested in ones of the same register set.
|
||||
// We also ensure that ones at the same ordinal as us are ignored,
|
||||
// which can happen with multiple local inserts/etc.
|
||||
spill_interval = spill_interval->next;
|
||||
continue;
|
||||
}
|
||||
spill_value = spill_interval->value;
|
||||
|
||||
// Find the uses right before/after current.
|
||||
auto use = spill_value->use_head;
|
||||
while (use) {
|
||||
if (use->instr->ordinal != -1) {
|
||||
if (use->instr->ordinal < current->start_ordinal) {
|
||||
if (!prev_use || prev_use->ordinal < use->instr->ordinal) {
|
||||
prev_use = use->instr;
|
||||
}
|
||||
} else if (use->instr->ordinal > current->start_ordinal) {
|
||||
if (!next_use || next_use->ordinal > use->instr->ordinal) {
|
||||
next_use = use->instr;
|
||||
}
|
||||
}
|
||||
}
|
||||
use = use->next;
|
||||
}
|
||||
if (!prev_use) {
|
||||
prev_use = spill_value->def;
|
||||
}
|
||||
if (prev_use->next == next_use) {
|
||||
// Uh, this interval is way too short.
|
||||
spill_interval = spill_interval->next;
|
||||
continue;
|
||||
}
|
||||
XEASSERT(prev_use->ordinal != -1);
|
||||
XEASSERTNOTNULL(next_use);
|
||||
break;
|
||||
}
|
||||
XEASSERT(spill_interval->free_until_set == free_until_set);
|
||||
|
||||
// Find the real last use -- paired ops may require sequences to stay
|
||||
// intact. This is a bad design.
|
||||
auto prev_def_tail = prev_use;
|
||||
while (prev_def_tail &&
|
||||
prev_def_tail->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
|
||||
prev_def_tail = prev_def_tail->prev;
|
||||
}
|
||||
|
||||
Value* new_value;
|
||||
uint32_t end_ordinal;
|
||||
if (spill_value->local_slot) {
|
||||
// Value is already assigned a slot, so load from that.
|
||||
// We can then split the interval right after the previous use to
|
||||
// before the next use.
|
||||
|
||||
// Update the last use of the spilled interval/value.
|
||||
end_ordinal = spill_interval->end_ordinal;
|
||||
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
|
||||
XEASSERT(end_ordinal != -1);
|
||||
XEASSERT(spill_interval->end_ordinal != -1);
|
||||
|
||||
// Insert a load right before the next use.
|
||||
new_value = builder->LoadLocal(spill_value->local_slot);
|
||||
builder->last_instr()->MoveBefore(next_use);
|
||||
|
||||
// Update last use info.
|
||||
new_value->last_use = spill_value->last_use;
|
||||
spill_value->last_use = prev_use;
|
||||
} else {
|
||||
// Allocate a local slot.
|
||||
spill_value->local_slot = builder->AllocLocal(spill_value->type);
|
||||
|
||||
// Insert a spill right after the def.
|
||||
builder->StoreLocal(spill_value->local_slot, spill_value);
|
||||
auto spill_store = builder->last_instr();
|
||||
spill_store->MoveBefore(prev_def_tail->next);
|
||||
|
||||
// Update last use of spilled interval/value.
|
||||
end_ordinal = spill_interval->end_ordinal;
|
||||
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
|
||||
XEASSERT(end_ordinal != -1);
|
||||
XEASSERT(spill_interval->end_ordinal != -1);
|
||||
|
||||
// Insert a load right before the next use.
|
||||
new_value = builder->LoadLocal(spill_value->local_slot);
|
||||
builder->last_instr()->MoveBefore(next_use);
|
||||
|
||||
// Update last use info.
|
||||
new_value->last_use = spill_value->last_use;
|
||||
spill_value->last_use = spill_store;
|
||||
}
|
||||
|
||||
// Reuse the same local slot. Hooray SSA.
|
||||
new_value->local_slot = spill_value->local_slot;
|
||||
|
||||
// Rename all future uses to that loaded value.
|
||||
auto use = spill_value->use_head;
|
||||
while (use) {
|
||||
// TODO(benvanik): keep use list sorted so we don't have to do this.
|
||||
if (use->instr->ordinal <= spill_interval->end_ordinal ||
|
||||
use->instr->ordinal == -1) {
|
||||
use = use->next;
|
||||
continue;
|
||||
}
|
||||
auto next = use->next;
|
||||
auto instr = use->instr;
|
||||
uint32_t signature = instr->opcode->signature;
|
||||
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src1.value == spill_value) {
|
||||
instr->set_src1(new_value);
|
||||
}
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src2.value == spill_value) {
|
||||
instr->set_src2(new_value);
|
||||
}
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src3.value == spill_value) {
|
||||
instr->set_src3(new_value);
|
||||
}
|
||||
}
|
||||
use = next;
|
||||
}
|
||||
|
||||
// Create new interval.
|
||||
auto arena = scratch_arena();
|
||||
auto new_interval = arena->Alloc<Interval>();
|
||||
new_interval->start_ordinal = new_value->def->ordinal;
|
||||
new_interval->end_ordinal = end_ordinal;
|
||||
new_interval->value = new_value;
|
||||
new_interval->next = NULL;
|
||||
new_interval->prev = NULL;
|
||||
if (new_value->type <= INT64_TYPE) {
|
||||
new_interval->free_until_set = free_until_sets_.int_set;
|
||||
} else if (new_value->type <= FLOAT64_TYPE) {
|
||||
new_interval->free_until_set = free_until_sets_.float_set;
|
||||
} else {
|
||||
new_interval->free_until_set = free_until_sets_.vec_set;
|
||||
}
|
||||
|
||||
// Remove the old interval from the active list, as it's been spilled.
|
||||
spill_interval->RemoveFromList(&intervals.active);
|
||||
spill_interval->AddToList(&intervals.handled);
|
||||
|
||||
// Insert interval into the right place in the list.
|
||||
// We know it's ahead of us.
|
||||
new_interval->InsertIntoList(&intervals.unhandled);
|
||||
|
||||
// TODO(benvanik): use the register we just freed?
|
||||
//current->value->reg.set = free_until_set->set;
|
||||
//current->value->reg.index = spill_interval->value->reg.index;
|
||||
bool allocated = TryAllocateFreeReg(current, intervals);
|
||||
XEASSERTTRUE(allocated);
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
||||
#define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
||||
|
||||
#include <alloy/backend/machine_info.h>
|
||||
#include <alloy/compiler/compiler_pass.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace compiler {
|
||||
namespace passes {
|
||||
|
||||
|
||||
class RegisterAllocationPass : public CompilerPass {
|
||||
public:
|
||||
RegisterAllocationPass(const backend::MachineInfo* machine_info);
|
||||
virtual ~RegisterAllocationPass();
|
||||
|
||||
virtual int Run(hir::HIRBuilder* builder);
|
||||
|
||||
private:
|
||||
struct Interval;
|
||||
struct Intervals;
|
||||
void ComputeLastUse(hir::Value* value);
|
||||
bool TryAllocateFreeReg(Interval* current, Intervals& intervals);
|
||||
void AllocateBlockedReg(hir::HIRBuilder* builder,
|
||||
Interval* current, Intervals& intervals);
|
||||
|
||||
private:
|
||||
const backend::MachineInfo* machine_info_;
|
||||
|
||||
struct RegisterFreeUntilSet {
|
||||
uint32_t count;
|
||||
uint32_t pos[32];
|
||||
const backend::MachineInfo::RegisterSet* set;
|
||||
};
|
||||
struct RegisterFreeUntilSets {
|
||||
RegisterFreeUntilSet* int_set;
|
||||
RegisterFreeUntilSet* float_set;
|
||||
RegisterFreeUntilSet* vec_set;
|
||||
RegisterFreeUntilSet* all_sets[3];
|
||||
};
|
||||
RegisterFreeUntilSets free_until_sets_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace passes
|
||||
} // namespace compiler
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
|
@ -15,6 +15,8 @@
|
|||
'finalization_pass.h',
|
||||
#'dead_store_elimination_pass.cc',
|
||||
#'dead_store_elimination_pass.h',
|
||||
'register_allocation_pass.cc',
|
||||
'register_allocation_pass.h',
|
||||
'simplification_pass.cc',
|
||||
'simplification_pass.h',
|
||||
'validation_pass.cc',
|
||||
|
|
|
@ -46,7 +46,7 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) :
|
|||
// Passes are executed in the order they are added. Multiple of the same
|
||||
// pass type may be used.
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
//compiler_->AddPass(new passes::ContextPromotionPass());
|
||||
compiler_->AddPass(new passes::ContextPromotionPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
compiler_->AddPass(new passes::SimplificationPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
|
@ -59,18 +59,16 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) :
|
|||
compiler_->AddPass(new passes::DeadCodeEliminationPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
|
||||
// Adds local load/stores.
|
||||
compiler_->AddPass(new passes::DataFlowAnalysisPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
compiler_->AddPass(new passes::SimplificationPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
//// Removes all unneeded variables. Try not to add new ones after this.
|
||||
//compiler_->AddPass(new passes::ValueReductionPass());
|
||||
//if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
|
||||
// Run DCE one more time to cleanup any local manipulation.
|
||||
compiler_->AddPass(new passes::DeadCodeEliminationPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
|
||||
// Removes all unneeded variables. Try not to add new ones after this.
|
||||
compiler_->AddPass(new passes::ValueReductionPass());
|
||||
// Register allocation for the target backend.
|
||||
// Will modify the HIR to add loads/stores.
|
||||
// This should be the last pass before finalization, as after this all
|
||||
// registers are assigned and ready to be emitted.
|
||||
compiler_->AddPass(new passes::RegisterAllocationPass(
|
||||
backend->machine_info()));
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
|
||||
// Must come last. The HIR is not really HIR after this.
|
||||
|
|
|
@ -108,6 +108,9 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) {
|
|||
};
|
||||
str->Append("v%d.%s", value->ordinal, type_names[value->type]);
|
||||
}
|
||||
if (value->reg.index != -1) {
|
||||
str->Append("<%s%d>", value->reg.set->name, value->reg.index);
|
||||
}
|
||||
}
|
||||
|
||||
void HIRBuilder::DumpOp(
|
||||
|
@ -453,6 +456,7 @@ Instr* HIRBuilder::AppendInstr(
|
|||
if (!block->instr_head) {
|
||||
block->instr_head = instr;
|
||||
}
|
||||
instr->ordinal = -1;
|
||||
instr->block = block;
|
||||
instr->opcode = &opcode_info;
|
||||
instr->flags = flags;
|
||||
|
@ -477,7 +481,8 @@ Value* HIRBuilder::AllocValue(TypeName type) {
|
|||
value->last_use = NULL;
|
||||
value->local_slot = NULL;
|
||||
value->tag = NULL;
|
||||
value->reg = -1;
|
||||
value->reg.set = NULL;
|
||||
value->reg.index = -1;
|
||||
return value;
|
||||
}
|
||||
|
||||
|
@ -492,7 +497,8 @@ Value* HIRBuilder::CloneValue(Value* source) {
|
|||
value->last_use = NULL;
|
||||
value->local_slot = NULL;
|
||||
value->tag = NULL;
|
||||
value->reg = -1;
|
||||
value->reg.set = NULL;
|
||||
value->reg.index = -1;
|
||||
return value;
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ public:
|
|||
|
||||
const OpcodeInfo* opcode;
|
||||
uint16_t flags;
|
||||
uint16_t ordinal;
|
||||
uint32_t ordinal;
|
||||
|
||||
typedef union {
|
||||
runtime::FunctionInfo* symbol_info;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define ALLOY_HIR_VALUE_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/backend/machine_info.h>
|
||||
#include <alloy/hir/opcodes.h>
|
||||
|
||||
|
||||
|
@ -90,7 +91,10 @@ public:
|
|||
TypeName type;
|
||||
|
||||
uint32_t flags;
|
||||
uint32_t reg;
|
||||
struct {
|
||||
const backend::MachineInfo::RegisterSet* set;
|
||||
int32_t index;
|
||||
} reg;
|
||||
ConstantValue constant;
|
||||
|
||||
Instr* def;
|
||||
|
|
|
@ -145,6 +145,7 @@ typedef XECACHEALIGN volatile void xe_aligned_void_t;
|
|||
static inline uint32_t XENEXTPOW2(uint32_t v) {
|
||||
v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v;
|
||||
}
|
||||
#define XEALIGN(value, align) ((value + align - 1) & ~(align - 1))
|
||||
|
||||
#define XESUCCEED() goto XECLEANUP
|
||||
#define XEFAIL() goto XECLEANUP
|
||||
|
|
Loading…
Reference in New Issue