A probably-working register allocator.
This commit is contained in:
parent
6bd214af0b
commit
4a584129d2
|
@ -74,15 +74,19 @@ int IVMAssembler::Assemble(
|
||||||
builder->ResetLabelTags();
|
builder->ResetLabelTags();
|
||||||
|
|
||||||
// Function prologue.
|
// Function prologue.
|
||||||
size_t stack_size = 0;
|
size_t stack_offset = 0;
|
||||||
auto locals = builder->locals();
|
auto locals = builder->locals();
|
||||||
for (auto it = locals.begin(); it != locals.end(); ++it) {
|
for (auto it = locals.begin(); it != locals.end(); ++it) {
|
||||||
auto slot = *it;
|
auto slot = *it;
|
||||||
size_t stack_offset = stack_size;
|
size_t type_size = GetTypeSize(slot->type);
|
||||||
|
// Align to natural size.
|
||||||
|
stack_offset = XEALIGN(stack_offset, type_size);
|
||||||
slot->set_constant(stack_offset);
|
slot->set_constant(stack_offset);
|
||||||
stack_size += GetTypeSize(slot->type);
|
stack_offset += type_size;
|
||||||
}
|
}
|
||||||
ctx.stack_size = stack_size;
|
// Ensure 16b alignment.
|
||||||
|
stack_offset = XEALIGN(stack_offset, 16);
|
||||||
|
ctx.stack_size = stack_offset;
|
||||||
|
|
||||||
auto block = builder->first_block();
|
auto block = builder->first_block();
|
||||||
while (block) {
|
while (block) {
|
||||||
|
|
|
@ -38,14 +38,14 @@ int IVMBackend::Initialize() {
|
||||||
0,
|
0,
|
||||||
"gpr",
|
"gpr",
|
||||||
MachineInfo::RegisterSet::INT_TYPES,
|
MachineInfo::RegisterSet::INT_TYPES,
|
||||||
10,
|
6,
|
||||||
};
|
};
|
||||||
machine_info_.register_sets[1] = {
|
machine_info_.register_sets[1] = {
|
||||||
1,
|
1,
|
||||||
"vec",
|
"vec",
|
||||||
MachineInfo::RegisterSet::FLOAT_TYPES |
|
MachineInfo::RegisterSet::FLOAT_TYPES |
|
||||||
MachineInfo::RegisterSet::VEC_TYPES,
|
MachineInfo::RegisterSet::VEC_TYPES,
|
||||||
10,
|
6,
|
||||||
};
|
};
|
||||||
|
|
||||||
alloy::tracing::WriteEvent(EventType::Init({
|
alloy::tracing::WriteEvent(EventType::Init({
|
||||||
|
|
|
@ -54,7 +54,7 @@ int X64Emitter::Initialize() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int X64Emitter::Emit(
|
int X64Emitter::Emit(
|
||||||
HIRBuilder* builder,
|
HIRBuilder* builder,
|
||||||
uint32_t debug_info_flags, runtime::DebugInfo* debug_info,
|
uint32_t debug_info_flags, runtime::DebugInfo* debug_info,
|
||||||
void*& out_code_address, size_t& out_code_size) {
|
void*& out_code_address, size_t& out_code_size) {
|
||||||
// Reset.
|
// Reset.
|
||||||
|
@ -98,8 +98,6 @@ void* X64Emitter::Emplace(size_t stack_size) {
|
||||||
return new_address;
|
return new_address;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define XEALIGN(value, align) ((value + align - 1) & ~(align - 1))
|
|
||||||
|
|
||||||
int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
||||||
// These are the registers we will not be using. All others are fare game.
|
// These are the registers we will not be using. All others are fare game.
|
||||||
const uint32_t reserved_regs =
|
const uint32_t reserved_regs =
|
||||||
|
@ -220,7 +218,7 @@ void X64Emitter::ResetRegisters(uint32_t reserved_regs) {
|
||||||
if (live_regs & 0x1) {
|
if (live_regs & 0x1) {
|
||||||
auto v = reg_state_.reg_values[n];
|
auto v = reg_state_.reg_values[n];
|
||||||
if (v) {
|
if (v) {
|
||||||
v->reg = -1;
|
v->reg.index = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
reg_state_.reg_values[n] = 0;
|
reg_state_.reg_values[n] = 0;
|
||||||
|
|
|
@ -15,8 +15,9 @@
|
||||||
#include <alloy/compiler/passes/context_promotion_pass.h>
|
#include <alloy/compiler/passes/context_promotion_pass.h>
|
||||||
#include <alloy/compiler/passes/data_flow_analysis_pass.h>
|
#include <alloy/compiler/passes/data_flow_analysis_pass.h>
|
||||||
#include <alloy/compiler/passes/dead_code_elimination_pass.h>
|
#include <alloy/compiler/passes/dead_code_elimination_pass.h>
|
||||||
|
//#include <alloy/compiler/passes/dead_store_elimination_pass.h>
|
||||||
#include <alloy/compiler/passes/finalization_pass.h>
|
#include <alloy/compiler/passes/finalization_pass.h>
|
||||||
//#include <alloy/compiler/passes/dead_store_elimination_pass.h>
|
#include <alloy/compiler/passes/register_allocation_pass.h>
|
||||||
#include <alloy/compiler/passes/simplification_pass.h>
|
#include <alloy/compiler/passes/simplification_pass.h>
|
||||||
#include <alloy/compiler/passes/validation_pass.h>
|
#include <alloy/compiler/passes/validation_pass.h>
|
||||||
#include <alloy/compiler/passes/value_reduction_pass.h>
|
#include <alloy/compiler/passes/value_reduction_pass.h>
|
||||||
|
@ -137,5 +138,42 @@
|
||||||
// store_context +302, v5
|
// store_context +302, v5
|
||||||
// branch_true v5, ...
|
// branch_true v5, ...
|
||||||
//
|
//
|
||||||
|
// - X86Canonicalization
|
||||||
|
// For various opcodes add copies/commute the arguments to match x86
|
||||||
|
// operand semantics. This makes code generation easier and if done
|
||||||
|
// before register allocation can prevent a lot of extra shuffling in
|
||||||
|
// the emitted code.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// <block0>:
|
||||||
|
// v0 = ...
|
||||||
|
// v1 = ...
|
||||||
|
// v2 = add v0, v1 <-- v1 now unused
|
||||||
|
// Becomes:
|
||||||
|
// v0 = ...
|
||||||
|
// v1 = ...
|
||||||
|
// v1 = add v1, v0 <-- src1 = dest/src, so reuse for both
|
||||||
|
// by commuting and setting dest = src1
|
||||||
|
//
|
||||||
|
// - RegisterAllocation
|
||||||
|
// Given a machine description (register classes, counts) run over values
|
||||||
|
// and assign them to registers, adding spills as needed. It should be
|
||||||
|
// possible to directly emit code from this form.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// <block0>:
|
||||||
|
// v0 = load_context +0
|
||||||
|
// v1 = load_context +1
|
||||||
|
// v0 = add v0, v1
|
||||||
|
// ...
|
||||||
|
// v2 = mul v0, v1
|
||||||
|
// Becomes:
|
||||||
|
// reg0 = load_context +0
|
||||||
|
// reg1 = load_context +1
|
||||||
|
// reg2 = add reg0, reg1
|
||||||
|
// store_local +123, reg2 <-- spill inserted
|
||||||
|
// ...
|
||||||
|
// reg0 = load_local +123 <-- load inserted
|
||||||
|
// reg0 = mul reg0, reg1
|
||||||
|
|
||||||
#endif // ALLOY_COMPILER_COMPILER_PASSES_H_
|
#endif // ALLOY_COMPILER_COMPILER_PASSES_H_
|
||||||
|
|
|
@ -41,19 +41,21 @@ int ControlFlowAnalysisPass::Run(HIRBuilder* builder) {
|
||||||
// Add edges.
|
// Add edges.
|
||||||
auto block = builder->first_block();
|
auto block = builder->first_block();
|
||||||
while (block) {
|
while (block) {
|
||||||
auto instr = block->instr_head;
|
auto instr = block->instr_tail;
|
||||||
while (instr) {
|
while (instr) {
|
||||||
if (instr->opcode->flags & OPCODE_FLAG_BRANCH) {
|
if (instr->opcode->flags & OPCODE_FLAG_BRANCH) {
|
||||||
if (instr->opcode == &OPCODE_BRANCH_info) {
|
if (instr->opcode == &OPCODE_BRANCH_info) {
|
||||||
auto label = instr->src1.label;
|
auto label = instr->src1.label;
|
||||||
builder->AddEdge(block, label->block, Edge::UNCONDITIONAL);
|
builder->AddEdge(block, label->block, Edge::UNCONDITIONAL);
|
||||||
|
break;
|
||||||
} else if (instr->opcode == &OPCODE_BRANCH_TRUE_info ||
|
} else if (instr->opcode == &OPCODE_BRANCH_TRUE_info ||
|
||||||
instr->opcode == &OPCODE_BRANCH_FALSE_info) {
|
instr->opcode == &OPCODE_BRANCH_FALSE_info) {
|
||||||
auto label = instr->src2.label;
|
auto label = instr->src2.label;
|
||||||
builder->AddEdge(block, label->block, 0);
|
builder->AddEdge(block, label->block, 0);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
instr = instr->next;
|
instr = instr->prev;
|
||||||
}
|
}
|
||||||
block = block->next;
|
block = block->next;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,471 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <alloy/compiler/passes/register_allocation_pass.h>
|
||||||
|
|
||||||
|
using namespace alloy;
|
||||||
|
using namespace alloy::backend;
|
||||||
|
using namespace alloy::compiler;
|
||||||
|
using namespace alloy::compiler::passes;
|
||||||
|
using namespace alloy::hir;
|
||||||
|
|
||||||
|
|
||||||
|
struct RegisterAllocationPass::Interval {
|
||||||
|
uint32_t start_ordinal;
|
||||||
|
uint32_t end_ordinal;
|
||||||
|
Value* value;
|
||||||
|
RegisterFreeUntilSet* free_until_set;
|
||||||
|
// TODO(benvanik): reduce to offsets in arena?
|
||||||
|
struct Interval* next;
|
||||||
|
struct Interval* prev;
|
||||||
|
|
||||||
|
void AddToList(Interval** list_head) {
|
||||||
|
auto list_next = *list_head;
|
||||||
|
this->next = list_next;
|
||||||
|
if (list_next) {
|
||||||
|
list_next->prev = this;
|
||||||
|
}
|
||||||
|
*list_head = this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InsertIntoList(Interval** list_head) {
|
||||||
|
auto it = *list_head;
|
||||||
|
while (it) {
|
||||||
|
if (it->start_ordinal > this->start_ordinal) {
|
||||||
|
// Went too far. Insert before this interval.
|
||||||
|
this->prev = it->prev;
|
||||||
|
this->next = it;
|
||||||
|
if (it->prev) {
|
||||||
|
it->prev->next = this;
|
||||||
|
} else {
|
||||||
|
*list_head = this;
|
||||||
|
}
|
||||||
|
it->prev = this;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!it->next) {
|
||||||
|
// None found, add at tail.
|
||||||
|
it->next = this;
|
||||||
|
this->prev = it;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
it = it->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoveFromList(Interval** list_head) {
|
||||||
|
if (this->next) {
|
||||||
|
this->next->prev = this->prev;
|
||||||
|
}
|
||||||
|
if (this->prev) {
|
||||||
|
this->prev->next = this->next;
|
||||||
|
} else {
|
||||||
|
*list_head = this->next;
|
||||||
|
}
|
||||||
|
this->next = this->prev = NULL;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RegisterAllocationPass::Intervals {
|
||||||
|
Interval* unhandled;
|
||||||
|
Interval* active;
|
||||||
|
Interval* handled;
|
||||||
|
};
|
||||||
|
|
||||||
|
RegisterAllocationPass::RegisterAllocationPass(
|
||||||
|
const MachineInfo* machine_info) :
|
||||||
|
machine_info_(machine_info),
|
||||||
|
CompilerPass() {
|
||||||
|
// Initialize register sets. The values of these will be
|
||||||
|
// cleared before use, so just the structure is required.
|
||||||
|
auto mi_sets = machine_info->register_sets;
|
||||||
|
xe_zero_struct(&free_until_sets_, sizeof(free_until_sets_));
|
||||||
|
uint32_t n = 0;
|
||||||
|
while (mi_sets[n].count) {
|
||||||
|
auto& mi_set = mi_sets[n];
|
||||||
|
auto free_until_set = new RegisterFreeUntilSet();
|
||||||
|
free_until_sets_.all_sets[n] = free_until_set;
|
||||||
|
free_until_set->count = mi_set.count;
|
||||||
|
free_until_set->set = &mi_set;
|
||||||
|
if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) {
|
||||||
|
free_until_sets_.int_set = free_until_set;
|
||||||
|
}
|
||||||
|
if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) {
|
||||||
|
free_until_sets_.float_set = free_until_set;
|
||||||
|
}
|
||||||
|
if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) {
|
||||||
|
free_until_sets_.vec_set = free_until_set;
|
||||||
|
}
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterAllocationPass::~RegisterAllocationPass() {
|
||||||
|
for (size_t n = 0; n < XECOUNT(free_until_sets_.all_sets); n++) {
|
||||||
|
if (!free_until_sets_.all_sets[n]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
delete free_until_sets_.all_sets[n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int RegisterAllocationPass::Run(HIRBuilder* builder) {
|
||||||
|
// A (probably broken) implementation of a linear scan register allocator
|
||||||
|
// that operates directly on SSA form:
|
||||||
|
// http://www.christianwimmer.at/Publications/Wimmer10a/Wimmer10a.pdf
|
||||||
|
//
|
||||||
|
// Requirements:
|
||||||
|
// - SSA form (single definition for variables)
|
||||||
|
// - block should be in linear order:
|
||||||
|
// - dominators *should* come before (a->b->c)
|
||||||
|
// - loop block sequences *should not* have intervening non-loop blocks
|
||||||
|
|
||||||
|
auto arena = scratch_arena();
|
||||||
|
|
||||||
|
// Renumber everything.
|
||||||
|
uint32_t block_ordinal = 0;
|
||||||
|
uint32_t instr_ordinal = 0;
|
||||||
|
auto block = builder->first_block();
|
||||||
|
while (block) {
|
||||||
|
// Sequential block ordinals.
|
||||||
|
block->ordinal = block_ordinal++;
|
||||||
|
auto instr = block->instr_head;
|
||||||
|
while (instr) {
|
||||||
|
// Sequential global instruction ordinals.
|
||||||
|
instr->ordinal = instr_ordinal++;
|
||||||
|
instr = instr->next;
|
||||||
|
}
|
||||||
|
block = block->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute all liveness ranges by walking forward through all
|
||||||
|
// blocks/instructions and checking the last use of each value. This lets
|
||||||
|
// us know the exact order in (block#,instr#) form, which is then used to
|
||||||
|
// setup the range.
|
||||||
|
// TODO(benvanik): ideally we would have a list of all values and not have
|
||||||
|
// to keep walking instructions over and over.
|
||||||
|
Interval* prev_interval = NULL;
|
||||||
|
Interval* head_interval = NULL;
|
||||||
|
block = builder->first_block();
|
||||||
|
while (block) {
|
||||||
|
auto instr = block->instr_head;
|
||||||
|
while (instr) {
|
||||||
|
// Compute last-use for the dest value.
|
||||||
|
// Since we know all values of importance must be defined, we can avoid
|
||||||
|
// having to check every value and just look at dest.
|
||||||
|
const OpcodeInfo* info = instr->opcode;
|
||||||
|
if (GET_OPCODE_SIG_TYPE_DEST(info->signature) == OPCODE_SIG_TYPE_V) {
|
||||||
|
auto v = instr->dest;
|
||||||
|
if (!v->last_use) {
|
||||||
|
ComputeLastUse(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add interval.
|
||||||
|
auto interval = arena->Alloc<Interval>();
|
||||||
|
interval->start_ordinal = instr->ordinal;
|
||||||
|
interval->end_ordinal = v->last_use ?
|
||||||
|
v->last_use->ordinal : v->def->ordinal;
|
||||||
|
interval->value = v;
|
||||||
|
interval->next = NULL;
|
||||||
|
interval->prev = prev_interval;
|
||||||
|
if (prev_interval) {
|
||||||
|
prev_interval->next = interval;
|
||||||
|
} else {
|
||||||
|
head_interval = interval;
|
||||||
|
}
|
||||||
|
prev_interval = interval;
|
||||||
|
|
||||||
|
// Grab register set to use.
|
||||||
|
// We do this now so it's only once per interval, and it makes it easy
|
||||||
|
// to only compare intervals that overlap their sets.
|
||||||
|
if (v->type <= INT64_TYPE) {
|
||||||
|
interval->free_until_set = free_until_sets_.int_set;
|
||||||
|
} else if (v->type <= FLOAT64_TYPE) {
|
||||||
|
interval->free_until_set = free_until_sets_.float_set;
|
||||||
|
} else {
|
||||||
|
interval->free_until_set = free_until_sets_.vec_set;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
instr = instr->next;
|
||||||
|
}
|
||||||
|
block = block->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now have a sorted list of intervals, minus their ending ordinals.
|
||||||
|
Intervals intervals;
|
||||||
|
intervals.unhandled = head_interval;
|
||||||
|
intervals.active = intervals.handled = NULL;
|
||||||
|
while (intervals.unhandled) {
|
||||||
|
// Get next unhandled interval.
|
||||||
|
auto current = intervals.unhandled;
|
||||||
|
intervals.unhandled = intervals.unhandled->next;
|
||||||
|
current->RemoveFromList(&intervals.unhandled);
|
||||||
|
|
||||||
|
// Check for intervals in active that are handled or inactive.
|
||||||
|
auto it = intervals.active;
|
||||||
|
while (it) {
|
||||||
|
auto next = it->next;
|
||||||
|
if (it->end_ordinal <= current->start_ordinal) {
|
||||||
|
// Move from active to handled.
|
||||||
|
it->RemoveFromList(&intervals.active);
|
||||||
|
it->AddToList(&intervals.handled);
|
||||||
|
}
|
||||||
|
it = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find a register for current.
|
||||||
|
if (!TryAllocateFreeReg(current, intervals)) {
|
||||||
|
// Failed, spill.
|
||||||
|
AllocateBlockedReg(builder, current, intervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current->value->reg.index!= -1) {
|
||||||
|
// Add current to active.
|
||||||
|
current->AddToList(&intervals.active);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegisterAllocationPass::ComputeLastUse(Value* value) {
|
||||||
|
// TODO(benvanik): compute during construction?
|
||||||
|
// Note that this list isn't sorted (unfortunately), so we have to scan
|
||||||
|
// them all.
|
||||||
|
uint32_t max_ordinal = 0;
|
||||||
|
Value::Use* last_use = NULL;
|
||||||
|
auto use = value->use_head;
|
||||||
|
while (use) {
|
||||||
|
if (!last_use || use->instr->ordinal >= max_ordinal) {
|
||||||
|
last_use = use;
|
||||||
|
max_ordinal = use->instr->ordinal;
|
||||||
|
}
|
||||||
|
use = use->next;
|
||||||
|
}
|
||||||
|
value->last_use = last_use ? last_use->instr : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RegisterAllocationPass::TryAllocateFreeReg(
|
||||||
|
Interval* current, Intervals& intervals) {
|
||||||
|
// Reset all registers in the set to unused.
|
||||||
|
auto free_until_set = current->free_until_set;
|
||||||
|
for (uint32_t n = 0; n < free_until_set->count; n++) {
|
||||||
|
free_until_set->pos[n] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark all active registers as used.
|
||||||
|
// TODO(benvanik): keep some kind of bitvector so that this is instant?
|
||||||
|
auto it = intervals.active;
|
||||||
|
while (it) {
|
||||||
|
if (it->free_until_set == free_until_set) {
|
||||||
|
free_until_set->pos[it->value->reg.index] = 0;
|
||||||
|
}
|
||||||
|
it = it->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t max_pos = 0;
|
||||||
|
for (uint32_t n = 0; n < free_until_set->count; n++) {
|
||||||
|
if (max_pos == -1) {
|
||||||
|
max_pos = n;
|
||||||
|
} else {
|
||||||
|
if (free_until_set->pos[n] > free_until_set->pos[max_pos]) {
|
||||||
|
max_pos = n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!free_until_set->pos[max_pos]) {
|
||||||
|
// No register available without spilling.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (current->end_ordinal < free_until_set->pos[max_pos]) {
|
||||||
|
// Register available for the whole interval.
|
||||||
|
current->value->reg.set = free_until_set->set;
|
||||||
|
current->value->reg.index = max_pos;
|
||||||
|
} else {
|
||||||
|
// Register available for the first part of the interval.
|
||||||
|
// Split the interval at where it hits the next one.
|
||||||
|
//current->value->reg = max_pos;
|
||||||
|
//SplitRange(current, free_until_set->pos[max_pos]);
|
||||||
|
// TODO(benvanik): actually split -- for now we just spill.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegisterAllocationPass::AllocateBlockedReg(
|
||||||
|
HIRBuilder* builder, Interval* current, Intervals& intervals) {
|
||||||
|
auto free_until_set = current->free_until_set;
|
||||||
|
|
||||||
|
// TODO(benvanik): smart heuristics.
|
||||||
|
// wimmer AllocateBlockedReg has some stuff for deciding whether to
|
||||||
|
// spill current or some other active interval - which we ignore.
|
||||||
|
|
||||||
|
// Pick a random interval. Maybe the first. Sure.
|
||||||
|
auto spill_interval = intervals.active;
|
||||||
|
Value* spill_value = NULL;
|
||||||
|
Instr* prev_use = NULL;
|
||||||
|
Instr* next_use = NULL;
|
||||||
|
while (spill_interval) {
|
||||||
|
if (spill_interval->free_until_set != free_until_set ||
|
||||||
|
spill_interval->start_ordinal == current->start_ordinal) {
|
||||||
|
// Only interested in ones of the same register set.
|
||||||
|
// We also ensure that ones at the same ordinal as us are ignored,
|
||||||
|
// which can happen with multiple local inserts/etc.
|
||||||
|
spill_interval = spill_interval->next;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
spill_value = spill_interval->value;
|
||||||
|
|
||||||
|
// Find the uses right before/after current.
|
||||||
|
auto use = spill_value->use_head;
|
||||||
|
while (use) {
|
||||||
|
if (use->instr->ordinal != -1) {
|
||||||
|
if (use->instr->ordinal < current->start_ordinal) {
|
||||||
|
if (!prev_use || prev_use->ordinal < use->instr->ordinal) {
|
||||||
|
prev_use = use->instr;
|
||||||
|
}
|
||||||
|
} else if (use->instr->ordinal > current->start_ordinal) {
|
||||||
|
if (!next_use || next_use->ordinal > use->instr->ordinal) {
|
||||||
|
next_use = use->instr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
use = use->next;
|
||||||
|
}
|
||||||
|
if (!prev_use) {
|
||||||
|
prev_use = spill_value->def;
|
||||||
|
}
|
||||||
|
if (prev_use->next == next_use) {
|
||||||
|
// Uh, this interval is way too short.
|
||||||
|
spill_interval = spill_interval->next;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
XEASSERT(prev_use->ordinal != -1);
|
||||||
|
XEASSERTNOTNULL(next_use);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
XEASSERT(spill_interval->free_until_set == free_until_set);
|
||||||
|
|
||||||
|
// Find the real last use -- paired ops may require sequences to stay
|
||||||
|
// intact. This is a bad design.
|
||||||
|
auto prev_def_tail = prev_use;
|
||||||
|
while (prev_def_tail &&
|
||||||
|
prev_def_tail->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
|
||||||
|
prev_def_tail = prev_def_tail->prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* new_value;
|
||||||
|
uint32_t end_ordinal;
|
||||||
|
if (spill_value->local_slot) {
|
||||||
|
// Value is already assigned a slot, so load from that.
|
||||||
|
// We can then split the interval right after the previous use to
|
||||||
|
// before the next use.
|
||||||
|
|
||||||
|
// Update the last use of the spilled interval/value.
|
||||||
|
end_ordinal = spill_interval->end_ordinal;
|
||||||
|
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
|
||||||
|
XEASSERT(end_ordinal != -1);
|
||||||
|
XEASSERT(spill_interval->end_ordinal != -1);
|
||||||
|
|
||||||
|
// Insert a load right before the next use.
|
||||||
|
new_value = builder->LoadLocal(spill_value->local_slot);
|
||||||
|
builder->last_instr()->MoveBefore(next_use);
|
||||||
|
|
||||||
|
// Update last use info.
|
||||||
|
new_value->last_use = spill_value->last_use;
|
||||||
|
spill_value->last_use = prev_use;
|
||||||
|
} else {
|
||||||
|
// Allocate a local slot.
|
||||||
|
spill_value->local_slot = builder->AllocLocal(spill_value->type);
|
||||||
|
|
||||||
|
// Insert a spill right after the def.
|
||||||
|
builder->StoreLocal(spill_value->local_slot, spill_value);
|
||||||
|
auto spill_store = builder->last_instr();
|
||||||
|
spill_store->MoveBefore(prev_def_tail->next);
|
||||||
|
|
||||||
|
// Update last use of spilled interval/value.
|
||||||
|
end_ordinal = spill_interval->end_ordinal;
|
||||||
|
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
|
||||||
|
XEASSERT(end_ordinal != -1);
|
||||||
|
XEASSERT(spill_interval->end_ordinal != -1);
|
||||||
|
|
||||||
|
// Insert a load right before the next use.
|
||||||
|
new_value = builder->LoadLocal(spill_value->local_slot);
|
||||||
|
builder->last_instr()->MoveBefore(next_use);
|
||||||
|
|
||||||
|
// Update last use info.
|
||||||
|
new_value->last_use = spill_value->last_use;
|
||||||
|
spill_value->last_use = spill_store;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reuse the same local slot. Hooray SSA.
|
||||||
|
new_value->local_slot = spill_value->local_slot;
|
||||||
|
|
||||||
|
// Rename all future uses to that loaded value.
|
||||||
|
auto use = spill_value->use_head;
|
||||||
|
while (use) {
|
||||||
|
// TODO(benvanik): keep use list sorted so we don't have to do this.
|
||||||
|
if (use->instr->ordinal <= spill_interval->end_ordinal ||
|
||||||
|
use->instr->ordinal == -1) {
|
||||||
|
use = use->next;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto next = use->next;
|
||||||
|
auto instr = use->instr;
|
||||||
|
uint32_t signature = instr->opcode->signature;
|
||||||
|
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
|
||||||
|
if (instr->src1.value == spill_value) {
|
||||||
|
instr->set_src1(new_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
|
||||||
|
if (instr->src2.value == spill_value) {
|
||||||
|
instr->set_src2(new_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
|
||||||
|
if (instr->src3.value == spill_value) {
|
||||||
|
instr->set_src3(new_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
use = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new interval.
|
||||||
|
auto arena = scratch_arena();
|
||||||
|
auto new_interval = arena->Alloc<Interval>();
|
||||||
|
new_interval->start_ordinal = new_value->def->ordinal;
|
||||||
|
new_interval->end_ordinal = end_ordinal;
|
||||||
|
new_interval->value = new_value;
|
||||||
|
new_interval->next = NULL;
|
||||||
|
new_interval->prev = NULL;
|
||||||
|
if (new_value->type <= INT64_TYPE) {
|
||||||
|
new_interval->free_until_set = free_until_sets_.int_set;
|
||||||
|
} else if (new_value->type <= FLOAT64_TYPE) {
|
||||||
|
new_interval->free_until_set = free_until_sets_.float_set;
|
||||||
|
} else {
|
||||||
|
new_interval->free_until_set = free_until_sets_.vec_set;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the old interval from the active list, as it's been spilled.
|
||||||
|
spill_interval->RemoveFromList(&intervals.active);
|
||||||
|
spill_interval->AddToList(&intervals.handled);
|
||||||
|
|
||||||
|
// Insert interval into the right place in the list.
|
||||||
|
// We know it's ahead of us.
|
||||||
|
new_interval->InsertIntoList(&intervals.unhandled);
|
||||||
|
|
||||||
|
// TODO(benvanik): use the register we just freed?
|
||||||
|
//current->value->reg.set = free_until_set->set;
|
||||||
|
//current->value->reg.index = spill_interval->value->reg.index;
|
||||||
|
bool allocated = TryAllocateFreeReg(current, intervals);
|
||||||
|
XEASSERTTRUE(allocated);
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
||||||
|
#define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
||||||
|
|
||||||
|
#include <alloy/backend/machine_info.h>
|
||||||
|
#include <alloy/compiler/compiler_pass.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace alloy {
|
||||||
|
namespace compiler {
|
||||||
|
namespace passes {
|
||||||
|
|
||||||
|
|
||||||
|
class RegisterAllocationPass : public CompilerPass {
|
||||||
|
public:
|
||||||
|
RegisterAllocationPass(const backend::MachineInfo* machine_info);
|
||||||
|
virtual ~RegisterAllocationPass();
|
||||||
|
|
||||||
|
virtual int Run(hir::HIRBuilder* builder);
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Interval;
|
||||||
|
struct Intervals;
|
||||||
|
void ComputeLastUse(hir::Value* value);
|
||||||
|
bool TryAllocateFreeReg(Interval* current, Intervals& intervals);
|
||||||
|
void AllocateBlockedReg(hir::HIRBuilder* builder,
|
||||||
|
Interval* current, Intervals& intervals);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const backend::MachineInfo* machine_info_;
|
||||||
|
|
||||||
|
struct RegisterFreeUntilSet {
|
||||||
|
uint32_t count;
|
||||||
|
uint32_t pos[32];
|
||||||
|
const backend::MachineInfo::RegisterSet* set;
|
||||||
|
};
|
||||||
|
struct RegisterFreeUntilSets {
|
||||||
|
RegisterFreeUntilSet* int_set;
|
||||||
|
RegisterFreeUntilSet* float_set;
|
||||||
|
RegisterFreeUntilSet* vec_set;
|
||||||
|
RegisterFreeUntilSet* all_sets[3];
|
||||||
|
};
|
||||||
|
RegisterFreeUntilSets free_until_sets_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace passes
|
||||||
|
} // namespace compiler
|
||||||
|
} // namespace alloy
|
||||||
|
|
||||||
|
|
||||||
|
#endif // ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
|
@ -15,6 +15,8 @@
|
||||||
'finalization_pass.h',
|
'finalization_pass.h',
|
||||||
#'dead_store_elimination_pass.cc',
|
#'dead_store_elimination_pass.cc',
|
||||||
#'dead_store_elimination_pass.h',
|
#'dead_store_elimination_pass.h',
|
||||||
|
'register_allocation_pass.cc',
|
||||||
|
'register_allocation_pass.h',
|
||||||
'simplification_pass.cc',
|
'simplification_pass.cc',
|
||||||
'simplification_pass.h',
|
'simplification_pass.h',
|
||||||
'validation_pass.cc',
|
'validation_pass.cc',
|
||||||
|
|
|
@ -46,7 +46,7 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) :
|
||||||
// Passes are executed in the order they are added. Multiple of the same
|
// Passes are executed in the order they are added. Multiple of the same
|
||||||
// pass type may be used.
|
// pass type may be used.
|
||||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||||
//compiler_->AddPass(new passes::ContextPromotionPass());
|
compiler_->AddPass(new passes::ContextPromotionPass());
|
||||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||||
compiler_->AddPass(new passes::SimplificationPass());
|
compiler_->AddPass(new passes::SimplificationPass());
|
||||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||||
|
@ -59,18 +59,16 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) :
|
||||||
compiler_->AddPass(new passes::DeadCodeEliminationPass());
|
compiler_->AddPass(new passes::DeadCodeEliminationPass());
|
||||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||||
|
|
||||||
// Adds local load/stores.
|
//// Removes all unneeded variables. Try not to add new ones after this.
|
||||||
compiler_->AddPass(new passes::DataFlowAnalysisPass());
|
//compiler_->AddPass(new passes::ValueReductionPass());
|
||||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
//if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||||
compiler_->AddPass(new passes::SimplificationPass());
|
|
||||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
|
||||||
|
|
||||||
// Run DCE one more time to cleanup any local manipulation.
|
// Register allocation for the target backend.
|
||||||
compiler_->AddPass(new passes::DeadCodeEliminationPass());
|
// Will modify the HIR to add loads/stores.
|
||||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
// This should be the last pass before finalization, as after this all
|
||||||
|
// registers are assigned and ready to be emitted.
|
||||||
// Removes all unneeded variables. Try not to add new ones after this.
|
compiler_->AddPass(new passes::RegisterAllocationPass(
|
||||||
compiler_->AddPass(new passes::ValueReductionPass());
|
backend->machine_info()));
|
||||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||||
|
|
||||||
// Must come last. The HIR is not really HIR after this.
|
// Must come last. The HIR is not really HIR after this.
|
||||||
|
|
|
@ -108,6 +108,9 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) {
|
||||||
};
|
};
|
||||||
str->Append("v%d.%s", value->ordinal, type_names[value->type]);
|
str->Append("v%d.%s", value->ordinal, type_names[value->type]);
|
||||||
}
|
}
|
||||||
|
if (value->reg.index != -1) {
|
||||||
|
str->Append("<%s%d>", value->reg.set->name, value->reg.index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HIRBuilder::DumpOp(
|
void HIRBuilder::DumpOp(
|
||||||
|
@ -453,6 +456,7 @@ Instr* HIRBuilder::AppendInstr(
|
||||||
if (!block->instr_head) {
|
if (!block->instr_head) {
|
||||||
block->instr_head = instr;
|
block->instr_head = instr;
|
||||||
}
|
}
|
||||||
|
instr->ordinal = -1;
|
||||||
instr->block = block;
|
instr->block = block;
|
||||||
instr->opcode = &opcode_info;
|
instr->opcode = &opcode_info;
|
||||||
instr->flags = flags;
|
instr->flags = flags;
|
||||||
|
@ -477,7 +481,8 @@ Value* HIRBuilder::AllocValue(TypeName type) {
|
||||||
value->last_use = NULL;
|
value->last_use = NULL;
|
||||||
value->local_slot = NULL;
|
value->local_slot = NULL;
|
||||||
value->tag = NULL;
|
value->tag = NULL;
|
||||||
value->reg = -1;
|
value->reg.set = NULL;
|
||||||
|
value->reg.index = -1;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -492,7 +497,8 @@ Value* HIRBuilder::CloneValue(Value* source) {
|
||||||
value->last_use = NULL;
|
value->last_use = NULL;
|
||||||
value->local_slot = NULL;
|
value->local_slot = NULL;
|
||||||
value->tag = NULL;
|
value->tag = NULL;
|
||||||
value->reg = -1;
|
value->reg.set = NULL;
|
||||||
|
value->reg.index = -1;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ public:
|
||||||
|
|
||||||
const OpcodeInfo* opcode;
|
const OpcodeInfo* opcode;
|
||||||
uint16_t flags;
|
uint16_t flags;
|
||||||
uint16_t ordinal;
|
uint32_t ordinal;
|
||||||
|
|
||||||
typedef union {
|
typedef union {
|
||||||
runtime::FunctionInfo* symbol_info;
|
runtime::FunctionInfo* symbol_info;
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#define ALLOY_HIR_VALUE_H_
|
#define ALLOY_HIR_VALUE_H_
|
||||||
|
|
||||||
#include <alloy/core.h>
|
#include <alloy/core.h>
|
||||||
|
#include <alloy/backend/machine_info.h>
|
||||||
#include <alloy/hir/opcodes.h>
|
#include <alloy/hir/opcodes.h>
|
||||||
|
|
||||||
|
|
||||||
|
@ -90,7 +91,10 @@ public:
|
||||||
TypeName type;
|
TypeName type;
|
||||||
|
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
uint32_t reg;
|
struct {
|
||||||
|
const backend::MachineInfo::RegisterSet* set;
|
||||||
|
int32_t index;
|
||||||
|
} reg;
|
||||||
ConstantValue constant;
|
ConstantValue constant;
|
||||||
|
|
||||||
Instr* def;
|
Instr* def;
|
||||||
|
|
|
@ -145,6 +145,7 @@ typedef XECACHEALIGN volatile void xe_aligned_void_t;
|
||||||
static inline uint32_t XENEXTPOW2(uint32_t v) {
|
static inline uint32_t XENEXTPOW2(uint32_t v) {
|
||||||
v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v;
|
v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v;
|
||||||
}
|
}
|
||||||
|
#define XEALIGN(value, align) ((value + align - 1) & ~(align - 1))
|
||||||
|
|
||||||
#define XESUCCEED() goto XECLEANUP
|
#define XESUCCEED() goto XECLEANUP
|
||||||
#define XEFAIL() goto XECLEANUP
|
#define XEFAIL() goto XECLEANUP
|
||||||
|
|
Loading…
Reference in New Issue