From 4a584129d205582ca2ce6ff09dc3410a9f0762b5 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 10 Feb 2014 21:16:38 -0800 Subject: [PATCH] A probably-working register allocator. --- src/alloy/backend/ivm/ivm_assembler.cc | 12 +- src/alloy/backend/ivm/ivm_backend.cc | 4 +- src/alloy/backend/x64/x64_emitter.cc | 6 +- src/alloy/compiler/compiler_passes.h | 40 +- .../passes/control_flow_analysis_pass.cc | 6 +- .../passes/register_allocation_pass.cc | 471 ++++++++++++++++++ .../passes/register_allocation_pass.h | 60 +++ src/alloy/compiler/passes/sources.gypi | 2 + src/alloy/frontend/ppc/ppc_translator.cc | 22 +- src/alloy/hir/hir_builder.cc | 10 +- src/alloy/hir/instr.h | 2 +- src/alloy/hir/value.h | 6 +- src/xenia/types.h | 1 + 13 files changed, 613 insertions(+), 29 deletions(-) create mode 100644 src/alloy/compiler/passes/register_allocation_pass.cc create mode 100644 src/alloy/compiler/passes/register_allocation_pass.h diff --git a/src/alloy/backend/ivm/ivm_assembler.cc b/src/alloy/backend/ivm/ivm_assembler.cc index b869d41ef..ff665b8f3 100644 --- a/src/alloy/backend/ivm/ivm_assembler.cc +++ b/src/alloy/backend/ivm/ivm_assembler.cc @@ -74,15 +74,19 @@ int IVMAssembler::Assemble( builder->ResetLabelTags(); // Function prologue. - size_t stack_size = 0; + size_t stack_offset = 0; auto locals = builder->locals(); for (auto it = locals.begin(); it != locals.end(); ++it) { auto slot = *it; - size_t stack_offset = stack_size; + size_t type_size = GetTypeSize(slot->type); + // Align to natural size. + stack_offset = XEALIGN(stack_offset, type_size); slot->set_constant(stack_offset); - stack_size += GetTypeSize(slot->type); + stack_offset += type_size; } - ctx.stack_size = stack_size; + // Ensure 16b alignment. + stack_offset = XEALIGN(stack_offset, 16); + ctx.stack_size = stack_offset; auto block = builder->first_block(); while (block) { diff --git a/src/alloy/backend/ivm/ivm_backend.cc b/src/alloy/backend/ivm/ivm_backend.cc index 6bd51037f..411d16d30 100644 --- a/src/alloy/backend/ivm/ivm_backend.cc +++ b/src/alloy/backend/ivm/ivm_backend.cc @@ -38,14 +38,14 @@ int IVMBackend::Initialize() { 0, "gpr", MachineInfo::RegisterSet::INT_TYPES, - 10, + 6, }; machine_info_.register_sets[1] = { 1, "vec", MachineInfo::RegisterSet::FLOAT_TYPES | MachineInfo::RegisterSet::VEC_TYPES, - 10, + 6, }; alloy::tracing::WriteEvent(EventType::Init({ diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 02a1aa132..4a1442ca5 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -54,7 +54,7 @@ int X64Emitter::Initialize() { } int X64Emitter::Emit( - HIRBuilder* builder, + HIRBuilder* builder, uint32_t debug_info_flags, runtime::DebugInfo* debug_info, void*& out_code_address, size_t& out_code_size) { // Reset. @@ -98,8 +98,6 @@ void* X64Emitter::Emplace(size_t stack_size) { return new_address; } -#define XEALIGN(value, align) ((value + align - 1) & ~(align - 1)) - int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { // These are the registers we will not be using. All others are fare game. const uint32_t reserved_regs = @@ -220,7 +218,7 @@ void X64Emitter::ResetRegisters(uint32_t reserved_regs) { if (live_regs & 0x1) { auto v = reg_state_.reg_values[n]; if (v) { - v->reg = -1; + v->reg.index = -1; } } reg_state_.reg_values[n] = 0; diff --git a/src/alloy/compiler/compiler_passes.h b/src/alloy/compiler/compiler_passes.h index ca074e221..20ec91c66 100644 --- a/src/alloy/compiler/compiler_passes.h +++ b/src/alloy/compiler/compiler_passes.h @@ -15,8 +15,9 @@ #include #include #include + //#include #include -//#include +#include #include #include #include @@ -137,5 +138,42 @@ // store_context +302, v5 // branch_true v5, ... // +// - X86Canonicalization +// For various opcodes add copies/commute the arguments to match x86 +// operand semantics. This makes code generation easier and if done +// before register allocation can prevent a lot of extra shuffling in +// the emitted code. +// +// Example: +// : +// v0 = ... +// v1 = ... +// v2 = add v0, v1 <-- v1 now unused +// Becomes: +// v0 = ... +// v1 = ... +// v1 = add v1, v0 <-- src1 = dest/src, so reuse for both +// by commuting and setting dest = src1 +// +// - RegisterAllocation +// Given a machine description (register classes, counts) run over values +// and assign them to registers, adding spills as needed. It should be +// possible to directly emit code from this form. +// +// Example: +// : +// v0 = load_context +0 +// v1 = load_context +1 +// v0 = add v0, v1 +// ... +// v2 = mul v0, v1 +// Becomes: +// reg0 = load_context +0 +// reg1 = load_context +1 +// reg2 = add reg0, reg1 +// store_local +123, reg2 <-- spill inserted +// ... +// reg0 = load_local +123 <-- load inserted +// reg0 = mul reg0, reg1 #endif // ALLOY_COMPILER_COMPILER_PASSES_H_ diff --git a/src/alloy/compiler/passes/control_flow_analysis_pass.cc b/src/alloy/compiler/passes/control_flow_analysis_pass.cc index 5e73bd502..89442bcb6 100644 --- a/src/alloy/compiler/passes/control_flow_analysis_pass.cc +++ b/src/alloy/compiler/passes/control_flow_analysis_pass.cc @@ -41,19 +41,21 @@ int ControlFlowAnalysisPass::Run(HIRBuilder* builder) { // Add edges. auto block = builder->first_block(); while (block) { - auto instr = block->instr_head; + auto instr = block->instr_tail; while (instr) { if (instr->opcode->flags & OPCODE_FLAG_BRANCH) { if (instr->opcode == &OPCODE_BRANCH_info) { auto label = instr->src1.label; builder->AddEdge(block, label->block, Edge::UNCONDITIONAL); + break; } else if (instr->opcode == &OPCODE_BRANCH_TRUE_info || instr->opcode == &OPCODE_BRANCH_FALSE_info) { auto label = instr->src2.label; builder->AddEdge(block, label->block, 0); + break; } } - instr = instr->next; + instr = instr->prev; } block = block->next; } diff --git a/src/alloy/compiler/passes/register_allocation_pass.cc b/src/alloy/compiler/passes/register_allocation_pass.cc new file mode 100644 index 000000000..20b4b021f --- /dev/null +++ b/src/alloy/compiler/passes/register_allocation_pass.cc @@ -0,0 +1,471 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +using namespace alloy; +using namespace alloy::backend; +using namespace alloy::compiler; +using namespace alloy::compiler::passes; +using namespace alloy::hir; + + +struct RegisterAllocationPass::Interval { + uint32_t start_ordinal; + uint32_t end_ordinal; + Value* value; + RegisterFreeUntilSet* free_until_set; + // TODO(benvanik): reduce to offsets in arena? + struct Interval* next; + struct Interval* prev; + + void AddToList(Interval** list_head) { + auto list_next = *list_head; + this->next = list_next; + if (list_next) { + list_next->prev = this; + } + *list_head = this; + } + + void InsertIntoList(Interval** list_head) { + auto it = *list_head; + while (it) { + if (it->start_ordinal > this->start_ordinal) { + // Went too far. Insert before this interval. + this->prev = it->prev; + this->next = it; + if (it->prev) { + it->prev->next = this; + } else { + *list_head = this; + } + it->prev = this; + return; + } + if (!it->next) { + // None found, add at tail. + it->next = this; + this->prev = it; + return; + } + it = it->next; + } + } + + void RemoveFromList(Interval** list_head) { + if (this->next) { + this->next->prev = this->prev; + } + if (this->prev) { + this->prev->next = this->next; + } else { + *list_head = this->next; + } + this->next = this->prev = NULL; + } +}; + +struct RegisterAllocationPass::Intervals { + Interval* unhandled; + Interval* active; + Interval* handled; +}; + +RegisterAllocationPass::RegisterAllocationPass( + const MachineInfo* machine_info) : + machine_info_(machine_info), + CompilerPass() { + // Initialize register sets. The values of these will be + // cleared before use, so just the structure is required. + auto mi_sets = machine_info->register_sets; + xe_zero_struct(&free_until_sets_, sizeof(free_until_sets_)); + uint32_t n = 0; + while (mi_sets[n].count) { + auto& mi_set = mi_sets[n]; + auto free_until_set = new RegisterFreeUntilSet(); + free_until_sets_.all_sets[n] = free_until_set; + free_until_set->count = mi_set.count; + free_until_set->set = &mi_set; + if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) { + free_until_sets_.int_set = free_until_set; + } + if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) { + free_until_sets_.float_set = free_until_set; + } + if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) { + free_until_sets_.vec_set = free_until_set; + } + n++; + } +} + +RegisterAllocationPass::~RegisterAllocationPass() { + for (size_t n = 0; n < XECOUNT(free_until_sets_.all_sets); n++) { + if (!free_until_sets_.all_sets[n]) { + break; + } + delete free_until_sets_.all_sets[n]; + } +} + +int RegisterAllocationPass::Run(HIRBuilder* builder) { + // A (probably broken) implementation of a linear scan register allocator + // that operates directly on SSA form: + // http://www.christianwimmer.at/Publications/Wimmer10a/Wimmer10a.pdf + // + // Requirements: + // - SSA form (single definition for variables) + // - block should be in linear order: + // - dominators *should* come before (a->b->c) + // - loop block sequences *should not* have intervening non-loop blocks + + auto arena = scratch_arena(); + + // Renumber everything. + uint32_t block_ordinal = 0; + uint32_t instr_ordinal = 0; + auto block = builder->first_block(); + while (block) { + // Sequential block ordinals. + block->ordinal = block_ordinal++; + auto instr = block->instr_head; + while (instr) { + // Sequential global instruction ordinals. + instr->ordinal = instr_ordinal++; + instr = instr->next; + } + block = block->next; + } + + // Compute all liveness ranges by walking forward through all + // blocks/instructions and checking the last use of each value. This lets + // us know the exact order in (block#,instr#) form, which is then used to + // setup the range. + // TODO(benvanik): ideally we would have a list of all values and not have + // to keep walking instructions over and over. + Interval* prev_interval = NULL; + Interval* head_interval = NULL; + block = builder->first_block(); + while (block) { + auto instr = block->instr_head; + while (instr) { + // Compute last-use for the dest value. + // Since we know all values of importance must be defined, we can avoid + // having to check every value and just look at dest. + const OpcodeInfo* info = instr->opcode; + if (GET_OPCODE_SIG_TYPE_DEST(info->signature) == OPCODE_SIG_TYPE_V) { + auto v = instr->dest; + if (!v->last_use) { + ComputeLastUse(v); + } + + // Add interval. + auto interval = arena->Alloc(); + interval->start_ordinal = instr->ordinal; + interval->end_ordinal = v->last_use ? + v->last_use->ordinal : v->def->ordinal; + interval->value = v; + interval->next = NULL; + interval->prev = prev_interval; + if (prev_interval) { + prev_interval->next = interval; + } else { + head_interval = interval; + } + prev_interval = interval; + + // Grab register set to use. + // We do this now so it's only once per interval, and it makes it easy + // to only compare intervals that overlap their sets. + if (v->type <= INT64_TYPE) { + interval->free_until_set = free_until_sets_.int_set; + } else if (v->type <= FLOAT64_TYPE) { + interval->free_until_set = free_until_sets_.float_set; + } else { + interval->free_until_set = free_until_sets_.vec_set; + } + } + + instr = instr->next; + } + block = block->next; + } + + // Now have a sorted list of intervals, minus their ending ordinals. + Intervals intervals; + intervals.unhandled = head_interval; + intervals.active = intervals.handled = NULL; + while (intervals.unhandled) { + // Get next unhandled interval. + auto current = intervals.unhandled; + intervals.unhandled = intervals.unhandled->next; + current->RemoveFromList(&intervals.unhandled); + + // Check for intervals in active that are handled or inactive. + auto it = intervals.active; + while (it) { + auto next = it->next; + if (it->end_ordinal <= current->start_ordinal) { + // Move from active to handled. + it->RemoveFromList(&intervals.active); + it->AddToList(&intervals.handled); + } + it = next; + } + + // Find a register for current. + if (!TryAllocateFreeReg(current, intervals)) { + // Failed, spill. + AllocateBlockedReg(builder, current, intervals); + } + + if (current->value->reg.index!= -1) { + // Add current to active. + current->AddToList(&intervals.active); + } + } + + return 0; +} + +void RegisterAllocationPass::ComputeLastUse(Value* value) { + // TODO(benvanik): compute during construction? + // Note that this list isn't sorted (unfortunately), so we have to scan + // them all. + uint32_t max_ordinal = 0; + Value::Use* last_use = NULL; + auto use = value->use_head; + while (use) { + if (!last_use || use->instr->ordinal >= max_ordinal) { + last_use = use; + max_ordinal = use->instr->ordinal; + } + use = use->next; + } + value->last_use = last_use ? last_use->instr : NULL; +} + +bool RegisterAllocationPass::TryAllocateFreeReg( + Interval* current, Intervals& intervals) { + // Reset all registers in the set to unused. + auto free_until_set = current->free_until_set; + for (uint32_t n = 0; n < free_until_set->count; n++) { + free_until_set->pos[n] = -1; + } + + // Mark all active registers as used. + // TODO(benvanik): keep some kind of bitvector so that this is instant? + auto it = intervals.active; + while (it) { + if (it->free_until_set == free_until_set) { + free_until_set->pos[it->value->reg.index] = 0; + } + it = it->next; + } + + uint32_t max_pos = 0; + for (uint32_t n = 0; n < free_until_set->count; n++) { + if (max_pos == -1) { + max_pos = n; + } else { + if (free_until_set->pos[n] > free_until_set->pos[max_pos]) { + max_pos = n; + } + } + } + if (!free_until_set->pos[max_pos]) { + // No register available without spilling. + return false; + } + if (current->end_ordinal < free_until_set->pos[max_pos]) { + // Register available for the whole interval. + current->value->reg.set = free_until_set->set; + current->value->reg.index = max_pos; + } else { + // Register available for the first part of the interval. + // Split the interval at where it hits the next one. + //current->value->reg = max_pos; + //SplitRange(current, free_until_set->pos[max_pos]); + // TODO(benvanik): actually split -- for now we just spill. + return false; + } + + return true; +} + +void RegisterAllocationPass::AllocateBlockedReg( + HIRBuilder* builder, Interval* current, Intervals& intervals) { + auto free_until_set = current->free_until_set; + + // TODO(benvanik): smart heuristics. + // wimmer AllocateBlockedReg has some stuff for deciding whether to + // spill current or some other active interval - which we ignore. + + // Pick a random interval. Maybe the first. Sure. + auto spill_interval = intervals.active; + Value* spill_value = NULL; + Instr* prev_use = NULL; + Instr* next_use = NULL; + while (spill_interval) { + if (spill_interval->free_until_set != free_until_set || + spill_interval->start_ordinal == current->start_ordinal) { + // Only interested in ones of the same register set. + // We also ensure that ones at the same ordinal as us are ignored, + // which can happen with multiple local inserts/etc. + spill_interval = spill_interval->next; + continue; + } + spill_value = spill_interval->value; + + // Find the uses right before/after current. + auto use = spill_value->use_head; + while (use) { + if (use->instr->ordinal != -1) { + if (use->instr->ordinal < current->start_ordinal) { + if (!prev_use || prev_use->ordinal < use->instr->ordinal) { + prev_use = use->instr; + } + } else if (use->instr->ordinal > current->start_ordinal) { + if (!next_use || next_use->ordinal > use->instr->ordinal) { + next_use = use->instr; + } + } + } + use = use->next; + } + if (!prev_use) { + prev_use = spill_value->def; + } + if (prev_use->next == next_use) { + // Uh, this interval is way too short. + spill_interval = spill_interval->next; + continue; + } + XEASSERT(prev_use->ordinal != -1); + XEASSERTNOTNULL(next_use); + break; + } + XEASSERT(spill_interval->free_until_set == free_until_set); + + // Find the real last use -- paired ops may require sequences to stay + // intact. This is a bad design. + auto prev_def_tail = prev_use; + while (prev_def_tail && + prev_def_tail->opcode->flags & OPCODE_FLAG_PAIRED_PREV) { + prev_def_tail = prev_def_tail->prev; + } + + Value* new_value; + uint32_t end_ordinal; + if (spill_value->local_slot) { + // Value is already assigned a slot, so load from that. + // We can then split the interval right after the previous use to + // before the next use. + + // Update the last use of the spilled interval/value. + end_ordinal = spill_interval->end_ordinal; + spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal; + XEASSERT(end_ordinal != -1); + XEASSERT(spill_interval->end_ordinal != -1); + + // Insert a load right before the next use. + new_value = builder->LoadLocal(spill_value->local_slot); + builder->last_instr()->MoveBefore(next_use); + + // Update last use info. + new_value->last_use = spill_value->last_use; + spill_value->last_use = prev_use; + } else { + // Allocate a local slot. + spill_value->local_slot = builder->AllocLocal(spill_value->type); + + // Insert a spill right after the def. + builder->StoreLocal(spill_value->local_slot, spill_value); + auto spill_store = builder->last_instr(); + spill_store->MoveBefore(prev_def_tail->next); + + // Update last use of spilled interval/value. + end_ordinal = spill_interval->end_ordinal; + spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal; + XEASSERT(end_ordinal != -1); + XEASSERT(spill_interval->end_ordinal != -1); + + // Insert a load right before the next use. + new_value = builder->LoadLocal(spill_value->local_slot); + builder->last_instr()->MoveBefore(next_use); + + // Update last use info. + new_value->last_use = spill_value->last_use; + spill_value->last_use = spill_store; + } + + // Reuse the same local slot. Hooray SSA. + new_value->local_slot = spill_value->local_slot; + + // Rename all future uses to that loaded value. + auto use = spill_value->use_head; + while (use) { + // TODO(benvanik): keep use list sorted so we don't have to do this. + if (use->instr->ordinal <= spill_interval->end_ordinal || + use->instr->ordinal == -1) { + use = use->next; + continue; + } + auto next = use->next; + auto instr = use->instr; + uint32_t signature = instr->opcode->signature; + if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) { + if (instr->src1.value == spill_value) { + instr->set_src1(new_value); + } + } + if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) { + if (instr->src2.value == spill_value) { + instr->set_src2(new_value); + } + } + if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) { + if (instr->src3.value == spill_value) { + instr->set_src3(new_value); + } + } + use = next; + } + + // Create new interval. + auto arena = scratch_arena(); + auto new_interval = arena->Alloc(); + new_interval->start_ordinal = new_value->def->ordinal; + new_interval->end_ordinal = end_ordinal; + new_interval->value = new_value; + new_interval->next = NULL; + new_interval->prev = NULL; + if (new_value->type <= INT64_TYPE) { + new_interval->free_until_set = free_until_sets_.int_set; + } else if (new_value->type <= FLOAT64_TYPE) { + new_interval->free_until_set = free_until_sets_.float_set; + } else { + new_interval->free_until_set = free_until_sets_.vec_set; + } + + // Remove the old interval from the active list, as it's been spilled. + spill_interval->RemoveFromList(&intervals.active); + spill_interval->AddToList(&intervals.handled); + + // Insert interval into the right place in the list. + // We know it's ahead of us. + new_interval->InsertIntoList(&intervals.unhandled); + + // TODO(benvanik): use the register we just freed? + //current->value->reg.set = free_until_set->set; + //current->value->reg.index = spill_interval->value->reg.index; + bool allocated = TryAllocateFreeReg(current, intervals); + XEASSERTTRUE(allocated); +} diff --git a/src/alloy/compiler/passes/register_allocation_pass.h b/src/alloy/compiler/passes/register_allocation_pass.h new file mode 100644 index 000000000..3167000ec --- /dev/null +++ b/src/alloy/compiler/passes/register_allocation_pass.h @@ -0,0 +1,60 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_ +#define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_ + +#include +#include + + +namespace alloy { +namespace compiler { +namespace passes { + + +class RegisterAllocationPass : public CompilerPass { +public: + RegisterAllocationPass(const backend::MachineInfo* machine_info); + virtual ~RegisterAllocationPass(); + + virtual int Run(hir::HIRBuilder* builder); + +private: + struct Interval; + struct Intervals; + void ComputeLastUse(hir::Value* value); + bool TryAllocateFreeReg(Interval* current, Intervals& intervals); + void AllocateBlockedReg(hir::HIRBuilder* builder, + Interval* current, Intervals& intervals); + +private: + const backend::MachineInfo* machine_info_; + + struct RegisterFreeUntilSet { + uint32_t count; + uint32_t pos[32]; + const backend::MachineInfo::RegisterSet* set; + }; + struct RegisterFreeUntilSets { + RegisterFreeUntilSet* int_set; + RegisterFreeUntilSet* float_set; + RegisterFreeUntilSet* vec_set; + RegisterFreeUntilSet* all_sets[3]; + }; + RegisterFreeUntilSets free_until_sets_; +}; + + +} // namespace passes +} // namespace compiler +} // namespace alloy + + +#endif // ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_ diff --git a/src/alloy/compiler/passes/sources.gypi b/src/alloy/compiler/passes/sources.gypi index bd5559319..ed16920ad 100644 --- a/src/alloy/compiler/passes/sources.gypi +++ b/src/alloy/compiler/passes/sources.gypi @@ -15,6 +15,8 @@ 'finalization_pass.h', #'dead_store_elimination_pass.cc', #'dead_store_elimination_pass.h', + 'register_allocation_pass.cc', + 'register_allocation_pass.h', 'simplification_pass.cc', 'simplification_pass.h', 'validation_pass.cc', diff --git a/src/alloy/frontend/ppc/ppc_translator.cc b/src/alloy/frontend/ppc/ppc_translator.cc index 2431f1761..61617db33 100644 --- a/src/alloy/frontend/ppc/ppc_translator.cc +++ b/src/alloy/frontend/ppc/ppc_translator.cc @@ -46,7 +46,7 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) : // Passes are executed in the order they are added. Multiple of the same // pass type may be used. if (validate) compiler_->AddPass(new passes::ValidationPass()); - //compiler_->AddPass(new passes::ContextPromotionPass()); + compiler_->AddPass(new passes::ContextPromotionPass()); if (validate) compiler_->AddPass(new passes::ValidationPass()); compiler_->AddPass(new passes::SimplificationPass()); if (validate) compiler_->AddPass(new passes::ValidationPass()); @@ -59,18 +59,16 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) : compiler_->AddPass(new passes::DeadCodeEliminationPass()); if (validate) compiler_->AddPass(new passes::ValidationPass()); - // Adds local load/stores. - compiler_->AddPass(new passes::DataFlowAnalysisPass()); - if (validate) compiler_->AddPass(new passes::ValidationPass()); - compiler_->AddPass(new passes::SimplificationPass()); - if (validate) compiler_->AddPass(new passes::ValidationPass()); + //// Removes all unneeded variables. Try not to add new ones after this. + //compiler_->AddPass(new passes::ValueReductionPass()); + //if (validate) compiler_->AddPass(new passes::ValidationPass()); - // Run DCE one more time to cleanup any local manipulation. - compiler_->AddPass(new passes::DeadCodeEliminationPass()); - if (validate) compiler_->AddPass(new passes::ValidationPass()); - - // Removes all unneeded variables. Try not to add new ones after this. - compiler_->AddPass(new passes::ValueReductionPass()); + // Register allocation for the target backend. + // Will modify the HIR to add loads/stores. + // This should be the last pass before finalization, as after this all + // registers are assigned and ready to be emitted. + compiler_->AddPass(new passes::RegisterAllocationPass( + backend->machine_info())); if (validate) compiler_->AddPass(new passes::ValidationPass()); // Must come last. The HIR is not really HIR after this. diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index 5e0be6dad..cad24c32c 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -108,6 +108,9 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) { }; str->Append("v%d.%s", value->ordinal, type_names[value->type]); } + if (value->reg.index != -1) { + str->Append("<%s%d>", value->reg.set->name, value->reg.index); + } } void HIRBuilder::DumpOp( @@ -453,6 +456,7 @@ Instr* HIRBuilder::AppendInstr( if (!block->instr_head) { block->instr_head = instr; } + instr->ordinal = -1; instr->block = block; instr->opcode = &opcode_info; instr->flags = flags; @@ -477,7 +481,8 @@ Value* HIRBuilder::AllocValue(TypeName type) { value->last_use = NULL; value->local_slot = NULL; value->tag = NULL; - value->reg = -1; + value->reg.set = NULL; + value->reg.index = -1; return value; } @@ -492,7 +497,8 @@ Value* HIRBuilder::CloneValue(Value* source) { value->last_use = NULL; value->local_slot = NULL; value->tag = NULL; - value->reg = -1; + value->reg.set = NULL; + value->reg.index = -1; return value; } diff --git a/src/alloy/hir/instr.h b/src/alloy/hir/instr.h index 57effa650..62983401d 100644 --- a/src/alloy/hir/instr.h +++ b/src/alloy/hir/instr.h @@ -52,7 +52,7 @@ public: const OpcodeInfo* opcode; uint16_t flags; - uint16_t ordinal; + uint32_t ordinal; typedef union { runtime::FunctionInfo* symbol_info; diff --git a/src/alloy/hir/value.h b/src/alloy/hir/value.h index 4fa957932..c2c8ed7ae 100644 --- a/src/alloy/hir/value.h +++ b/src/alloy/hir/value.h @@ -11,6 +11,7 @@ #define ALLOY_HIR_VALUE_H_ #include +#include #include @@ -90,7 +91,10 @@ public: TypeName type; uint32_t flags; - uint32_t reg; + struct { + const backend::MachineInfo::RegisterSet* set; + int32_t index; + } reg; ConstantValue constant; Instr* def; diff --git a/src/xenia/types.h b/src/xenia/types.h index f4356e94a..928c71766 100644 --- a/src/xenia/types.h +++ b/src/xenia/types.h @@ -145,6 +145,7 @@ typedef XECACHEALIGN volatile void xe_aligned_void_t; static inline uint32_t XENEXTPOW2(uint32_t v) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v; } +#define XEALIGN(value, align) ((value + align - 1) & ~(align - 1)) #define XESUCCEED() goto XECLEANUP #define XEFAIL() goto XECLEANUP