Trying out a new style of JIT pattern matching.

This commit is contained in:
Ben Vanik 2014-05-26 20:28:21 -07:00
parent a001714fb0
commit 5a85263e5f
38 changed files with 6403 additions and 5160 deletions

View File

@ -40,10 +40,10 @@ namespace ivm {
#define DPRINT #define DPRINT
#define DFLUSH() #define DFLUSH()
//#define IPRINT if (ics.thread_state->thread_id() == 1) printf #define IPRINT if (ics.thread_state->thread_id() == 1) printf
//#define IFLUSH() fflush(stdout) #define IFLUSH() fflush(stdout)
//#define DPRINT if (ics.thread_state->thread_id() == 1) printf #define DPRINT if (ics.thread_state->thread_id() == 1) printf
//#define DFLUSH() fflush(stdout) #define DFLUSH() fflush(stdout)
#if XE_CPU_BIGENDIAN #if XE_CPU_BIGENDIAN
#define VECB16(v,n) (v.b16[n]) #define VECB16(v,n) (v.b16[n])
@ -1364,31 +1364,31 @@ int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) {
} }
uint32_t IntCode_LOAD_LOCAL_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_LOCAL_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_LOCAL_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_LOCAL_I16(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_LOCAL_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_LOCAL_I32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_LOCAL_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_LOCAL_I64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_LOCAL_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_LOCAL_F32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f32 = *((float*)(ics.locals + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].f32 = *((float*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_LOCAL_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_LOCAL_F64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f64 = *((double*)(ics.locals + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].f64 = *((double*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_LOCAL_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_LOCAL_V128(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT; return IA_NEXT;
} }
int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) { int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) {
@ -1405,31 +1405,31 @@ int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) {
} }
uint32_t IntCode_STORE_LOCAL_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_LOCAL_I8(IntCodeState& ics, const IntCode* i) {
*((int8_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i8; *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i8;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_LOCAL_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_LOCAL_I16(IntCodeState& ics, const IntCode* i) {
*((int16_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i16; *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i16;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_LOCAL_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_LOCAL_I32(IntCodeState& ics, const IntCode* i) {
*((int32_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i32; *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i32;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_LOCAL_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_LOCAL_I64(IntCodeState& ics, const IntCode* i) {
*((int64_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i64; *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i64;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_LOCAL_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_LOCAL_F32(IntCodeState& ics, const IntCode* i) {
*((float*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f32; *((float*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f32;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_LOCAL_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_LOCAL_F64(IntCodeState& ics, const IntCode* i) {
*((double*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f64; *((double*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f64;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_LOCAL_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_LOCAL_V128(IntCodeState& ics, const IntCode* i) {
*((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].v128; *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].v128;
return IA_NEXT; return IA_NEXT;
} }
int Translate_STORE_LOCAL(TranslationContext& ctx, Instr* i) { int Translate_STORE_LOCAL(TranslationContext& ctx, Instr* i) {
@ -3715,17 +3715,17 @@ int Translate_CNTLZ(TranslationContext& ctx, Instr* i) {
uint32_t IntCode_EXTRACT_INT8_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_EXTRACT_INT8_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i64); ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i8);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_EXTRACT_INT16_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_EXTRACT_INT16_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i64); ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i8);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_EXTRACT_INT32_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_EXTRACT_INT32_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i64); ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i8);
return IA_NEXT; return IA_NEXT;
} }
int Translate_EXTRACT(TranslationContext& ctx, Instr* i) { int Translate_EXTRACT(TranslationContext& ctx, Instr* i) {

File diff suppressed because it is too large Load Diff

View File

@ -1,71 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/backend/x64/lowering/lowering_table.h>
#include <alloy/backend/x64/x64_emitter.h>
#include <alloy/backend/x64/lowering/lowering_sequences.h>
using namespace alloy;
using namespace alloy::backend::x64;
using namespace alloy::backend::x64::lowering;
LoweringTable::LoweringTable(X64Backend* backend) :
backend_(backend) {
xe_zero_struct(lookup_, sizeof(lookup_));
}
LoweringTable::~LoweringTable() {
for (size_t n = 0; n < XECOUNT(lookup_); n++) {
auto entry = lookup_[n];
while (entry) {
auto next = entry->next;
delete entry;
entry = next;
}
}
}
int LoweringTable::Initialize() {
RegisterSequences(this);
return 0;
}
void LoweringTable::AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn) {
auto existing_entry = lookup_[starting_opcode];
auto new_entry = new sequence_fn_entry_t();
new_entry->fn = fn;
new_entry->next = existing_entry;
lookup_[starting_opcode] = new_entry;
}
int LoweringTable::ProcessBlock(X64Emitter& e, hir::Block* block) {
// Process instructions.
auto instr = block->instr_head;
while (instr) {
bool processed = false;
auto entry = lookup_[instr->opcode->num];
while (entry) {
if ((*entry->fn)(e, instr)) {
processed = true;
break;
}
entry = entry->next;
}
if (!processed) {
// No sequence found!
XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
return 1;
instr = e.Advance(instr);
}
}
return 0;
}

View File

@ -1,58 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
#include <alloy/core.h>
#include <alloy/hir/hir_builder.h>
namespace alloy {
namespace backend {
namespace x64 {
class X64Backend;
class X64Emitter;
namespace lowering {
class LoweringTable {
public:
LoweringTable(X64Backend* backend);
~LoweringTable();
int Initialize();
int ProcessBlock(X64Emitter& e, hir::Block* block);
public:
typedef bool(*sequence_fn_t)(X64Emitter& e, hir::Instr*& instr);
void AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn);
private:
class sequence_fn_entry_t {
public:
sequence_fn_t fn;
sequence_fn_entry_t* next;
};
// NOTE: this class is shared by multiple threads and is not thread safe.
// Do not modify anything after init.
X64Backend* backend_;
sequence_fn_entry_t* lookup_[hir::__OPCODE_MAX_VALUE];
};
} // namespace lowering
} // namespace x64
} // namespace backend
} // namespace alloy
#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +0,0 @@
# Copyright 2013 Ben Vanik. All Rights Reserved.
{
'sources': [
'lowering_sequences.cc',
'lowering_sequences.h',
'lowering_table.cc',
'lowering_table.h',
'op_utils.inl',
'tracers.cc',
'tracers.h',
],
}

View File

@ -12,11 +12,12 @@
'x64_emitter.h', 'x64_emitter.h',
'x64_function.cc', 'x64_function.cc',
'x64_function.h', 'x64_function.h',
'x64_sequence.inl',
'x64_sequences.cc',
'x64_sequences.h',
'x64_thunk_emitter.cc', 'x64_thunk_emitter.cc',
'x64_thunk_emitter.h', 'x64_thunk_emitter.h',
], 'x64_tracers.cc',
'x64_tracers.h',
'includes': [
'lowering/sources.gypi',
], ],
} }

View File

@ -12,26 +12,23 @@
#include <alloy/backend/x64/tracing.h> #include <alloy/backend/x64/tracing.h>
#include <alloy/backend/x64/x64_assembler.h> #include <alloy/backend/x64/x64_assembler.h>
#include <alloy/backend/x64/x64_code_cache.h> #include <alloy/backend/x64/x64_code_cache.h>
#include <alloy/backend/x64/x64_sequences.h>
#include <alloy/backend/x64/x64_thunk_emitter.h> #include <alloy/backend/x64/x64_thunk_emitter.h>
#include <alloy/backend/x64/lowering/lowering_table.h>
#include <alloy/backend/x64/lowering/lowering_sequences.h>
using namespace alloy; using namespace alloy;
using namespace alloy::backend; using namespace alloy::backend;
using namespace alloy::backend::x64; using namespace alloy::backend::x64;
using namespace alloy::backend::x64::lowering;
using namespace alloy::runtime; using namespace alloy::runtime;
X64Backend::X64Backend(Runtime* runtime) : X64Backend::X64Backend(Runtime* runtime) :
code_cache_(0), lowering_table_(0), code_cache_(0),
Backend(runtime) { Backend(runtime) {
} }
X64Backend::~X64Backend() { X64Backend::~X64Backend() {
alloy::tracing::WriteEvent(EventType::Deinit({ alloy::tracing::WriteEvent(EventType::Deinit({
})); }));
delete lowering_table_;
delete code_cache_; delete code_cache_;
} }
@ -41,6 +38,8 @@ int X64Backend::Initialize() {
return result; return result;
} }
RegisterSequences();
machine_info_.register_sets[0] = { machine_info_.register_sets[0] = {
0, 0,
"gpr", "gpr",
@ -68,9 +67,6 @@ int X64Backend::Initialize() {
delete thunk_emitter; delete thunk_emitter;
delete allocator; delete allocator;
lowering_table_ = new LoweringTable(this);
RegisterSequences(lowering_table_);
alloy::tracing::WriteEvent(EventType::Init({ alloy::tracing::WriteEvent(EventType::Init({
})); }));

View File

@ -20,7 +20,6 @@ namespace backend {
namespace x64 { namespace x64 {
class X64CodeCache; class X64CodeCache;
namespace lowering { class LoweringTable; }
#define ALLOY_HAS_X64_BACKEND 1 #define ALLOY_HAS_X64_BACKEND 1
@ -38,8 +37,6 @@ public:
HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; } HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; }
GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; } GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; }
lowering::LoweringTable* lowering_table() const { return lowering_table_; }
virtual int Initialize(); virtual int Initialize();
virtual Assembler* CreateAssembler(); virtual Assembler* CreateAssembler();
@ -48,8 +45,6 @@ private:
X64CodeCache* code_cache_; X64CodeCache* code_cache_;
HostToGuestThunk host_to_guest_thunk_; HostToGuestThunk host_to_guest_thunk_;
GuestToHostThunk guest_to_host_thunk_; GuestToHostThunk guest_to_host_thunk_;
lowering::LoweringTable* lowering_table_;
}; };

View File

@ -11,10 +11,14 @@
#include <alloy/backend/x64/x64_backend.h> #include <alloy/backend/x64/x64_backend.h>
#include <alloy/backend/x64/x64_code_cache.h> #include <alloy/backend/x64/x64_code_cache.h>
#include <alloy/backend/x64/x64_function.h>
#include <alloy/backend/x64/x64_sequences.h>
#include <alloy/backend/x64/x64_thunk_emitter.h> #include <alloy/backend/x64/x64_thunk_emitter.h>
#include <alloy/backend/x64/lowering/lowering_table.h>
#include <alloy/hir/hir_builder.h> #include <alloy/hir/hir_builder.h>
#include <alloy/runtime/debug_info.h> #include <alloy/runtime/debug_info.h>
#include <alloy/runtime/runtime.h>
#include <alloy/runtime/symbol_info.h>
#include <alloy/runtime/thread_state.h>
using namespace alloy; using namespace alloy;
using namespace alloy::backend; using namespace alloy::backend;
@ -31,6 +35,13 @@ namespace x64 {
static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024; static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024;
static const size_t STASH_OFFSET = 32;
// If we are running with tracing on we have to store the EFLAGS in the stack,
// otherwise our calls out to C to print will clear it before DID_CARRY/etc
// can get the value.
#define STORE_EFLAGS 1
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace alloy } // namespace alloy
@ -145,12 +156,9 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
mov(qword[rsp + StackLayout::GUEST_RCX_HOME], rcx); mov(qword[rsp + StackLayout::GUEST_RCX_HOME], rcx);
mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx); mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx);
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0); mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0);
// ReloadRDX:
mov(rdx, qword[rcx + 8]); // membase mov(rdx, qword[rcx + 8]); // membase
} }
auto lowering_table = backend_->lowering_table();
// Body. // Body.
auto block = builder->first_block(); auto block = builder->first_block();
while (block) { while (block) {
@ -161,12 +169,17 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
label = label->next; label = label->next;
} }
// Add instructions. // Process instructions.
// The table will process sequences of instructions to (try to) const Instr* instr = block->instr_head;
// generate optimal code. while (instr) {
current_instr_ = block->instr_head; const Instr* new_tail = instr;
if (lowering_table->ProcessBlock(*this, block)) { if (!SelectSequence(*this, instr, &new_tail)) {
return 1; // No sequence found!
XEASSERTALWAYS();
XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
break;
}
instr = new_tail;
} }
block = block->next; block = block->next;
@ -191,16 +204,320 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
return 0; return 0;
} }
Instr* X64Emitter::Advance(Instr* i) { void X64Emitter::MarkSourceOffset(const Instr* i) {
auto next = i->next;
current_instr_ = next;
return next;
}
void X64Emitter::MarkSourceOffset(Instr* i) {
auto entry = source_map_arena_.Alloc<SourceMapEntry>(); auto entry = source_map_arena_.Alloc<SourceMapEntry>();
entry->source_offset = i->src1.offset; entry->source_offset = i->src1.offset;
entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal; entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal;
entry->code_offset = getSize(); entry->code_offset = getSize();
source_map_count_++; source_map_count_++;
} }
void X64Emitter::DebugBreak() {
// TODO(benvanik): notify debugger.
db(0xCC);
}
void X64Emitter::Trap() {
// TODO(benvanik): notify debugger.
db(0xCC);
}
void X64Emitter::UnimplementedInstr(const hir::Instr* i) {
// TODO(benvanik): notify debugger.
db(0xCC);
XEASSERTALWAYS();
}
uint64_t ResolveFunctionSymbol(void* raw_context, uint64_t symbol_info_ptr) {
// TODO(benvanik): generate this thunk at runtime? or a shim?
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
auto symbol_info = reinterpret_cast<FunctionInfo*>(symbol_info_ptr);
Function* fn = NULL;
thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn);
XEASSERTNOTNULL(fn);
auto x64_fn = static_cast<X64Function*>(fn);
return reinterpret_cast<uint64_t>(x64_fn->machine_code());
}
void X64Emitter::Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info) {
auto fn = reinterpret_cast<X64Function*>(symbol_info->function());
// Resolve address to the function to call and store in rax.
// TODO(benvanik): caching/etc. For now this makes debugging easier.
if (fn) {
mov(rax, reinterpret_cast<uint64_t>(fn->machine_code()));
} else {
CallNative(ResolveFunctionSymbol, reinterpret_cast<uint64_t>(symbol_info));
}
// Actually jump/call to rax.
if (instr->flags & CALL_TAIL) {
// Pass the callers return address over.
mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
add(rsp, static_cast<uint32_t>(stack_size()));
jmp(rax);
} else {
// Return address is from the previous SET_RETURN_ADDRESS.
mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
call(rax);
}
}
uint64_t ResolveFunctionAddress(void* raw_context, uint64_t target_address) {
// TODO(benvanik): generate this thunk at runtime? or a shim?
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
// TODO(benvanik): required?
target_address &= 0xFFFFFFFF;
Function* fn = NULL;
thread_state->runtime()->ResolveFunction(target_address, &fn);
XEASSERTNOTNULL(fn);
auto x64_fn = static_cast<X64Function*>(fn);
return reinterpret_cast<uint64_t>(x64_fn->machine_code());
}
void X64Emitter::CallIndirect(const hir::Instr* instr, const Reg64& reg) {
// Check if return.
if (instr->flags & CALL_POSSIBLE_RETURN) {
cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]);
je("epilog", CodeGenerator::T_NEAR);
}
// Resolve address to the function to call and store in rax.
// TODO(benvanik): caching/etc. For now this makes debugging easier.
if (reg.getIdx() != rdx.getIdx()) {
mov(rdx, reg);
}
CallNative(ResolveFunctionAddress);
// Actually jump/call to rax.
if (instr->flags & CALL_TAIL) {
// Pass the callers return address over.
mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
add(rsp, static_cast<uint32_t>(stack_size()));
jmp(rax);
} else {
// Return address is from the previous SET_RETURN_ADDRESS.
mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
call(rax);
}
}
uint64_t UndefinedCallExtern(void* raw_context, uint64_t symbol_info_ptr) {
auto symbol_info = reinterpret_cast<FunctionInfo*>(symbol_info_ptr);
XELOGW("undefined extern call to %.8X %s",
symbol_info->address(),
symbol_info->name());
return 0;
}
void X64Emitter::CallExtern(const hir::Instr* instr, const FunctionInfo* symbol_info) {
XEASSERT(symbol_info->behavior() == FunctionInfo::BEHAVIOR_EXTERN);
if (!symbol_info->extern_handler()) {
CallNative(UndefinedCallExtern, reinterpret_cast<uint64_t>(symbol_info));
} else {
// rcx = context
// rdx = target host function
// r8 = arg0
// r9 = arg1
mov(rdx, reinterpret_cast<uint64_t>(symbol_info->extern_handler()));
mov(r8, reinterpret_cast<uint64_t>(symbol_info->extern_arg0()));
mov(r9, reinterpret_cast<uint64_t>(symbol_info->extern_arg1()));
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
call(rax);
ReloadECX();
ReloadEDX();
// rax = host return
}
}
void X64Emitter::CallNative(void* fn) {
mov(rax, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
}
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context)) {
mov(rax, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
}
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0)) {
mov(rax, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
}
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0) {
mov(rdx, arg0);
mov(rax, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
}
void X64Emitter::SetReturnAddress(uint64_t value) {
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], value);
}
void X64Emitter::ReloadECX() {
mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]);
}
void X64Emitter::ReloadEDX() {
mov(rdx, qword[rcx + 8]); // membase
}
void X64Emitter::LoadEflags() {
#if STORE_EFLAGS
mov(eax, dword[rsp + STASH_OFFSET]);
push(rax);
popf();
#else
// EFLAGS already present.
#endif // STORE_EFLAGS
}
void X64Emitter::StoreEflags() {
#if STORE_EFLAGS
pushf();
pop(qword[rsp + STASH_OFFSET]);
#else
// EFLAGS should have CA set?
// (so long as we don't fuck with it)
#endif // STORE_EFLAGS
}
bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) {
if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov.
return true;
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
// Negative number that fits in 32bits.
return true;
}
return false;
}
void X64Emitter::MovMem64(const RegExp& addr, uint64_t v) {
if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov.
mov(qword[addr], v);
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
// Negative number that fits in 32bits.
mov(qword[addr], v);
} else if (!(v >> 32)) {
// All high bits are zero. It'd be nice if we had a way to load a 32bit
// immediate without sign extending!
// TODO(benvanik): this is super common, find a better way.
mov(dword[addr], static_cast<uint32_t>(v));
mov(dword[addr + 4], 0);
} else {
// 64bit number that needs double movs.
mov(dword[addr], static_cast<uint32_t>(v));
mov(dword[addr + 4], static_cast<uint32_t>(v >> 32));
}
}
Address X64Emitter::GetXmmConstPtr(XmmConst id) {
static const vec128_t xmm_consts[] = {
/* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f),
/* XMMOne */ vec128f(1.0f, 1.0f, 1.0f, 1.0f),
/* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f),
/* XMMMaskX16Y16 */ vec128i(0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000),
/* XMMFlipX16Y16 */ vec128i(0x00008000, 0x00000000, 0x00000000, 0x00000000),
/* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f),
/* XMMNormalizeX16Y16 */ vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02),
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
/* XMMOneMask */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),
};
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
// prevent this move.
// TODO(benvanik): move to predictable location in PPCContext? could then
// just do rcx relative addression with no rax overwriting.
mov(rax, (uint64_t)&xmm_consts[id]);
return ptr[rax];
}
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
// http://www.agner.org/optimize/optimizing_assembly.pdf
// 13.4 Generating constants
if (!v.low && !v.high) {
// 0000...
vpxor(dest, dest);
} else if (v.low == ~0ull && v.high == ~0ull) {
// 1111...
vmovaps(dest, GetXmmConstPtr(XMMOneMask));
} else {
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
MovMem64(rsp + STASH_OFFSET, v.low);
MovMem64(rsp + STASH_OFFSET + 8, v.high);
vmovdqa(dest, ptr[rsp + STASH_OFFSET]);
}
}
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, float v) {
union {
float f;
uint32_t i;
} x = { v };
if (!v) {
// 0
vpxor(dest, dest);
} else if (x.i == ~0UL) {
// 1111...
vmovaps(dest, GetXmmConstPtr(XMMOneMask));
} else {
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
mov(eax, x.i);
vmovd(dest, eax);
}
}
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, double v) {
union {
double d;
uint64_t i;
} x = { v };
if (!v) {
// 0
vpxor(dest, dest);
} else if (x.i == ~0ULL) {
// 1111...
vmovaps(dest, GetXmmConstPtr(XMMOneMask));
} else {
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
mov(rax, x.i);
vmovq(dest, rax);
}
}
Address X64Emitter::StashXmm(const Xmm& r) {
auto addr = ptr[rsp + STASH_OFFSET];
vmovups(addr, r);
return addr;
}
Address X64Emitter::StashXmm(const vec128_t& v) {
auto addr = ptr[rsp + STASH_OFFSET];
LoadConstantXmm(xmm0, v);
vmovups(addr, xmm0);
return addr;
}

View File

@ -19,7 +19,9 @@
XEDECLARECLASS2(alloy, hir, HIRBuilder); XEDECLARECLASS2(alloy, hir, HIRBuilder);
XEDECLARECLASS2(alloy, hir, Instr); XEDECLARECLASS2(alloy, hir, Instr);
XEDECLARECLASS2(alloy, runtime, DebugInfo); XEDECLARECLASS2(alloy, runtime, DebugInfo);
XEDECLARECLASS2(alloy, runtime, FunctionInfo);
XEDECLARECLASS2(alloy, runtime, Runtime); XEDECLARECLASS2(alloy, runtime, Runtime);
XEDECLARECLASS2(alloy, runtime, SymbolInfo);
namespace alloy { namespace alloy {
namespace backend { namespace backend {
@ -33,6 +35,25 @@ enum RegisterFlags {
REG_ABCD = (1 << 1), REG_ABCD = (1 << 1),
}; };
enum XmmConst {
XMMZero = 0,
XMMOne = 1,
XMMNegativeOne = 2,
XMMMaskX16Y16 = 3,
XMMFlipX16Y16 = 4,
XMMFixX16Y16 = 5,
XMMNormalizeX16Y16 = 6,
XMM3301 = 7,
XMMSignMaskPS = 8,
XMMSignMaskPD = 9,
XMMByteSwapMask = 10,
XMMPermuteControl15 = 11,
XMMUnpackD3DCOLOR = 12,
XMMOneOver255 = 13,
XMMShiftMaskPS = 14,
XMMOneMask = 15,
};
// Unfortunately due to the design of xbyak we have to pass this to the ctor. // Unfortunately due to the design of xbyak we have to pass this to the ctor.
class XbyakAllocator : public Xbyak::Allocator { class XbyakAllocator : public Xbyak::Allocator {
public: public:
@ -54,79 +75,68 @@ public:
void*& out_code_address, size_t& out_code_size); void*& out_code_address, size_t& out_code_size);
public: public:
template<typename V0>
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) {
SetupReg(v0, r0);
}
template<typename V0, typename V1>
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
hir::Value* v1, V1& r1, uint32_t r1_flags) {
SetupReg(v0, r0);
SetupReg(v1, r1);
}
template<typename V0, typename V1, typename V2>
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
hir::Value* v1, V1& r1, uint32_t r1_flags,
hir::Value* v2, V2& r2, uint32_t r2_flags) {
SetupReg(v0, r0);
SetupReg(v1, r1);
SetupReg(v2, r2);
}
template<typename V0, typename V1, typename V2, typename V3>
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
hir::Value* v1, V1& r1, uint32_t r1_flags,
hir::Value* v2, V2& r2, uint32_t r2_flags,
hir::Value* v3, V3& r3, uint32_t r3_flags) {
SetupReg(v0, r0);
SetupReg(v1, r1);
SetupReg(v2, r2);
SetupReg(v3, r3);
}
template<typename V0>
void EndOp(V0& r0) {
}
template<typename V0, typename V1>
void EndOp(V0& r0, V1& r1) {
}
template<typename V0, typename V1, typename V2>
void EndOp(V0& r0, V1& r1, V2& r2) {
}
template<typename V0, typename V1, typename V2, typename V3>
void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) {
}
// Reserved: rsp // Reserved: rsp
// Scratch: rax/rcx/rdx // Scratch: rax/rcx/rdx
// xmm0-1 // xmm0-2 (could be only xmm0 with some trickery)
// Available: rbx, r12-r15 (save to get r8-r11, rbp, rsi, rdi?) // Available: rbx, r12-r15 (save to get r8-r11, rbp, rsi, rdi?)
// xmm6-xmm15 (save to get xmm2-xmm5) // xmm6-xmm15 (save to get xmm3-xmm5)
static const int GPR_COUNT = 5; static const int GPR_COUNT = 5;
static const int XMM_COUNT = 10; static const int XMM_COUNT = 10;
static void SetupReg(hir::Value* v, Xbyak::Reg8& r) { static void SetupReg(const hir::Value* v, Xbyak::Reg8& r) {
auto idx = gpr_reg_map_[v->reg.index]; auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg8(idx); r = Xbyak::Reg8(idx);
} }
static void SetupReg(hir::Value* v, Xbyak::Reg16& r) { static void SetupReg(const hir::Value* v, Xbyak::Reg16& r) {
auto idx = gpr_reg_map_[v->reg.index]; auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg16(idx); r = Xbyak::Reg16(idx);
} }
static void SetupReg(hir::Value* v, Xbyak::Reg32& r) { static void SetupReg(const hir::Value* v, Xbyak::Reg32& r) {
auto idx = gpr_reg_map_[v->reg.index]; auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg32(idx); r = Xbyak::Reg32(idx);
} }
static void SetupReg(hir::Value* v, Xbyak::Reg64& r) { static void SetupReg(const hir::Value* v, Xbyak::Reg64& r) {
auto idx = gpr_reg_map_[v->reg.index]; auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg64(idx); r = Xbyak::Reg64(idx);
} }
static void SetupReg(hir::Value* v, Xbyak::Xmm& r) { static void SetupReg(const hir::Value* v, Xbyak::Xmm& r) {
auto idx = xmm_reg_map_[v->reg.index]; auto idx = xmm_reg_map_[v->reg.index];
r = Xbyak::Xmm(idx); r = Xbyak::Xmm(idx);
} }
hir::Instr* Advance(hir::Instr* i); void MarkSourceOffset(const hir::Instr* i);
void MarkSourceOffset(hir::Instr* i); void DebugBreak();
void Trap();
void UnimplementedInstr(const hir::Instr* i);
void UnimplementedExtern(const hir::Instr* i);
void Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info);
void CallIndirect(const hir::Instr* instr, const Xbyak::Reg64& reg);
void CallExtern(const hir::Instr* instr, const runtime::FunctionInfo* symbol_info);
void CallNative(void* fn);
void CallNative(uint64_t(*fn)(void* raw_context));
void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0));
void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0);
void SetReturnAddress(uint64_t value);
void ReloadECX();
void ReloadEDX();
// TODO(benvanik): Label for epilog (don't use strings).
void LoadEflags();
void StoreEflags();
// Moves a 64bit immediate into memory.
bool ConstantFitsIn32Reg(uint64_t v);
void MovMem64(const Xbyak::RegExp& addr, uint64_t v);
Xbyak::Address GetXmmConstPtr(XmmConst id);
void LoadConstantXmm(Xbyak::Xmm dest, float v);
void LoadConstantXmm(Xbyak::Xmm dest, double v);
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
Xbyak::Address StashXmm(const Xbyak::Xmm& r);
Xbyak::Address StashXmm(const vec128_t& v);
size_t stack_size() const { return stack_size_; } size_t stack_size() const { return stack_size_; }

View File

@ -0,0 +1,714 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
namespace {
enum KeyType {
KEY_TYPE_X = OPCODE_SIG_TYPE_X,
KEY_TYPE_L = OPCODE_SIG_TYPE_L,
KEY_TYPE_O = OPCODE_SIG_TYPE_O,
KEY_TYPE_S = OPCODE_SIG_TYPE_S,
KEY_TYPE_V_I8 = OPCODE_SIG_TYPE_V + INT8_TYPE,
KEY_TYPE_V_I16 = OPCODE_SIG_TYPE_V + INT16_TYPE,
KEY_TYPE_V_I32 = OPCODE_SIG_TYPE_V + INT32_TYPE,
KEY_TYPE_V_I64 = OPCODE_SIG_TYPE_V + INT64_TYPE,
KEY_TYPE_V_F32 = OPCODE_SIG_TYPE_V + FLOAT32_TYPE,
KEY_TYPE_V_F64 = OPCODE_SIG_TYPE_V + FLOAT64_TYPE,
KEY_TYPE_V_V128 = OPCODE_SIG_TYPE_V + VEC128_TYPE,
};
#pragma pack(push, 1)
union InstrKey {
struct {
uint32_t opcode : 8;
uint32_t dest : 5;
uint32_t src1 : 5;
uint32_t src2 : 5;
uint32_t src3 : 5;
uint32_t reserved : 4;
};
uint32_t value;
operator uint32_t() const {
return value;
}
InstrKey() : value(0) {}
InstrKey(uint32_t v) : value(v) {}
InstrKey(const Instr* i) : value(0) {
opcode = i->opcode->num;
uint32_t sig = i->opcode->signature;
dest = GET_OPCODE_SIG_TYPE_DEST(sig) ? OPCODE_SIG_TYPE_V + i->dest->type : 0;
src1 = GET_OPCODE_SIG_TYPE_SRC1(sig);
if (src1 == OPCODE_SIG_TYPE_V) {
src1 += i->src1.value->type;
}
src2 = GET_OPCODE_SIG_TYPE_SRC2(sig);
if (src2 == OPCODE_SIG_TYPE_V) {
src2 += i->src2.value->type;
}
src3 = GET_OPCODE_SIG_TYPE_SRC3(sig);
if (src3 == OPCODE_SIG_TYPE_V) {
src3 += i->src3.value->type;
}
}
template <Opcode OPCODE,
KeyType DEST = KEY_TYPE_X,
KeyType SRC1 = KEY_TYPE_X,
KeyType SRC2 = KEY_TYPE_X,
KeyType SRC3 = KEY_TYPE_X>
struct Construct {
static const uint32_t value =
(OPCODE) | (DEST << 8) | (SRC1 << 13) | (SRC2 << 18) | (SRC3 << 23);
};
};
#pragma pack(pop)
static_assert(sizeof(InstrKey) <= 4, "Key must be 4 bytes");
template <typename... Ts>
struct CombinedStruct;
template <>
struct CombinedStruct<> {};
template <typename T, typename... Ts>
struct CombinedStruct<T, Ts...> : T, CombinedStruct<Ts...> {};
struct OpBase {};
template <typename T, KeyType KEY_TYPE>
struct Op : OpBase {
static const KeyType key_type = KEY_TYPE;
};
struct VoidOp : Op<VoidOp, KEY_TYPE_X> {
protected:
template <typename T, KeyType KEY_TYPE> friend struct Op;
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
void Load(const Instr::Op& op) {}
};
struct OffsetOp : Op<OffsetOp, KEY_TYPE_O> {
uint64_t value;
protected:
template <typename T, KeyType KEY_TYPE> friend struct Op;
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
void Load(const Instr::Op& op) {
this->value = op.offset;
}
};
struct SymbolOp : Op<SymbolOp, KEY_TYPE_S> {
FunctionInfo* value;
protected:
template <typename T, KeyType KEY_TYPE> friend struct Op;
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
bool Load(const Instr::Op& op) {
this->value = op.symbol_info;
return true;
}
};
struct LabelOp : Op<LabelOp, KEY_TYPE_L> {
hir::Label* value;
protected:
template <typename T, KeyType KEY_TYPE> friend struct Op;
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
void Load(const Instr::Op& op) {
this->value = op.label;
}
};
template <typename T, KeyType KEY_TYPE, typename REG_TYPE, typename CONST_TYPE, int TAG = -1>
struct ValueOp : Op<ValueOp<T, KEY_TYPE, REG_TYPE, CONST_TYPE, TAG>, KEY_TYPE> {
typedef REG_TYPE reg_type;
static const int tag = TAG;
const Value* value;
bool is_constant;
virtual bool ConstantFitsIn32Reg() const { return true; }
const REG_TYPE& reg() const {
XEASSERT(!is_constant);
return reg_;
}
operator const REG_TYPE&() const {
return reg();
}
bool IsEqual(const T& b) const {
if (is_constant && b.is_constant) {
return reinterpret_cast<const T*>(this)->constant() == b.constant();
} else if (!is_constant && !b.is_constant) {
return reg_.getIdx() == b.reg_.getIdx();
} else {
return false;
}
}
bool IsEqual(const Xbyak::Reg& b) const {
if (is_constant) {
return false;
} else if (!is_constant) {
return reg_.getIdx() == b.getIdx();
} else {
return false;
}
}
bool operator== (const T& b) const {
return IsEqual(b);
}
bool operator!= (const T& b) const {
return !IsEqual(b);
}
bool operator== (const Xbyak::Reg& b) const {
return IsEqual(b);
}
bool operator!= (const Xbyak::Reg& b) const {
return !IsEqual(b);
}
void Load(const Instr::Op& op) {
const Value* value = op.value;
this->value = value;
is_constant = value->IsConstant();
if (!is_constant) {
X64Emitter::SetupReg(value, reg_);
}
}
protected:
REG_TYPE reg_;
};
template <int TAG = -1>
struct I8 : ValueOp<I8<TAG>, KEY_TYPE_V_I8, Reg8, int8_t, TAG> {
const int8_t constant() const {
XEASSERT(is_constant);
return value->constant.i8;
}
};
template <int TAG = -1>
struct I16 : ValueOp<I16<TAG>, KEY_TYPE_V_I16, Reg16, int16_t, TAG> {
const int16_t constant() const {
XEASSERT(is_constant);
return value->constant.i16;
}
};
template <int TAG = -1>
struct I32 : ValueOp<I32<TAG>, KEY_TYPE_V_I32, Reg32, int32_t, TAG> {
const int32_t constant() const {
XEASSERT(is_constant);
return value->constant.i32;
}
};
template <int TAG = -1>
struct I64 : ValueOp<I64<TAG>, KEY_TYPE_V_I64, Reg64, int64_t, TAG> {
const int64_t constant() const {
XEASSERT(is_constant);
return value->constant.i64;
}
bool ConstantFitsIn32Reg() const override {
int64_t v = value->constant.i64;
if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov.
return true;
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
// Negative number that fits in 32bits.
return true;
}
return false;
}
};
template <int TAG = -1>
struct F32 : ValueOp<F32<TAG>, KEY_TYPE_V_F32, Xmm, float, TAG> {
const float constant() const {
XEASSERT(is_constant);
return value->constant.f32;
}
};
template <int TAG = -1>
struct F64 : ValueOp<F64<TAG>, KEY_TYPE_V_F64, Xmm, double, TAG> {
const double constant() const {
XEASSERT(is_constant);
return value->constant.f64;
}
};
template <int TAG = -1>
struct V128 : ValueOp<V128<TAG>, KEY_TYPE_V_V128, Xmm, vec128_t, TAG> {
const vec128_t& constant() const {
XEASSERT(is_constant);
return value->constant.v128;
}
};
struct TagTable {
struct {
bool valid;
Instr::Op op;
} table[16];
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_X>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
return true;
}
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_L>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
return true;
}
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_O>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
return true;
}
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_S>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
return true;
}
template <typename T, typename std::enable_if<T::key_type >= KEY_TYPE_V_I8>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
const Value* value = op.value;
if (T::tag == -1) {
return true;
}
if (table[T::tag].valid &&
table[T::tag].op.value != value) {
return false;
}
table[T::tag].valid = true;
table[T::tag].op.value = (Value*)value;
return true;
}
};
template <typename DEST, typename... Tf>
struct DestField;
template <typename DEST>
struct DestField<DEST> {
DEST dest;
protected:
bool LoadDest(const Instr* i, TagTable& tag_table) {
Instr::Op op;
op.value = i->dest;
if (tag_table.CheckTag<DEST>(op)) {
dest.Load(op);
return true;
}
return false;
}
};
template <>
struct DestField<VoidOp> {
protected:
bool LoadDest(const Instr* i, TagTable& tag_table) {
return true;
}
};
template <hir::Opcode OPCODE, typename... Ts>
struct I;
template <hir::Opcode OPCODE, typename DEST>
struct I<OPCODE, DEST> : DestField<DEST> {
static const hir::Opcode opcode = OPCODE;
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type>::value;
static const KeyType dest_type = DEST::key_type;
const Instr* instr;
protected:
template <typename... Ti> friend struct SequenceFields;
bool Load(const Instr* i, TagTable& tag_table) {
if (InstrKey(i).value == key &&
LoadDest(i, tag_table)) {
instr = i;
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1>
struct I<OPCODE, DEST, SRC1> : DestField<DEST> {
static const hir::Opcode opcode = OPCODE;
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
const Instr* instr;
SRC1 src1;
protected:
template <typename... Ti> friend struct SequenceFields;
bool Load(const Instr* i, TagTable& tag_table) {
if (InstrKey(i).value == key &&
LoadDest(i, tag_table) &&
tag_table.CheckTag<SRC1>(i->src1)) {
instr = i;
src1.Load(i->src1);
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2>
struct I<OPCODE, DEST, SRC1, SRC2> : DestField<DEST> {
static const hir::Opcode opcode = OPCODE;
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type, SRC2::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
static const KeyType src2_type = SRC2::key_type;
const Instr* instr;
SRC1 src1;
SRC2 src2;
protected:
template <typename... Ti> friend struct SequenceFields;
bool Load(const Instr* i, TagTable& tag_table) {
if (InstrKey(i).value == key &&
LoadDest(i, tag_table) &&
tag_table.CheckTag<SRC1>(i->src1) &&
tag_table.CheckTag<SRC2>(i->src2)) {
instr = i;
src1.Load(i->src1);
src2.Load(i->src2);
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2, typename SRC3>
struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
static const hir::Opcode opcode = OPCODE;
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type, SRC2::key_type, SRC3::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
static const KeyType src2_type = SRC2::key_type;
static const KeyType src3_type = SRC3::key_type;
const Instr* instr;
SRC1 src1;
SRC2 src2;
SRC3 src3;
protected:
template <typename... Ti> friend struct SequenceFields;
bool Load(const Instr* i, TagTable& tag_table) {
if (InstrKey(i).value == key &&
LoadDest(i, tag_table) &&
tag_table.CheckTag<SRC1>(i->src1) &&
tag_table.CheckTag<SRC2>(i->src2) &&
tag_table.CheckTag<SRC3>(i->src3)) {
instr = i;
src1.Load(i->src1);
src2.Load(i->src2);
src3.Load(i->src3);
return true;
}
return false;
}
};
template <typename... Ti>
struct SequenceFields;
template <typename I1>
struct SequenceFields<I1> {
I1 i1;
typedef typename I1 I1Type;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (i1.Load(i, tag_table)) {
*new_tail = i->next;
return true;
}
return false;
}
};
template <typename I1, typename I2>
struct SequenceFields<I1, I2> : SequenceFields<I1> {
I2 i2;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (SequenceFields<I1>::Check(i, tag_table, new_tail)) {
auto ni = i->next;
if (ni && i2.Load(ni, tag_table)) {
*new_tail = ni;
return i;
}
}
return false;
}
};
template <typename I1, typename I2, typename I3>
struct SequenceFields<I1, I2, I3> : SequenceFields<I1, I2> {
I3 i3;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (SequenceFields<I1, I2>::Check(i, tag_table, new_tail)) {
auto ni = i->next;
if (ni && i3.Load(ni, tag_table)) {
*new_tail = ni;
return i;
}
}
return false;
}
};
template <typename I1, typename I2, typename I3, typename I4>
struct SequenceFields<I1, I2, I3, I4> : SequenceFields<I1, I2, I3> {
I4 i4;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (SequenceFields<I1, I2, I3>::Check(i, tag_table, new_tail)) {
auto ni = i->next;
if (ni && i4.Load(ni, tag_table)) {
*new_tail = ni;
return i;
}
}
return false;
}
};
template <typename I1, typename I2, typename I3, typename I4, typename I5>
struct SequenceFields<I1, I2, I3, I4, I5> : SequenceFields<I1, I2, I3, I4> {
I5 i5;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (SequenceFields<I1, I2, I3, I4>::Check(i, tag_table, new_tail)) {
auto ni = i->next;
if (ni && i5.Load(ni, tag_table)) {
*new_tail = ni;
return i;
}
}
return false;
}
};
template <typename SEQ, typename... Ti>
struct Sequence {
struct EmitArgs : SequenceFields<Ti...> {};
static bool Select(X64Emitter& e, const Instr* i, const Instr** new_tail) {
EmitArgs args;
TagTable tag_table;
if (!args.Check(i, tag_table, new_tail)) {
return false;
}
SEQ::Emit(e, args);
return true;
}
};
template <typename T>
const T GetTempReg(X64Emitter& e);
template <>
const Reg8 GetTempReg<Reg8>(X64Emitter& e) {
return e.al;
}
template <>
const Reg16 GetTempReg<Reg16>(X64Emitter& e) {
return e.ax;
}
template <>
const Reg32 GetTempReg<Reg32>(X64Emitter& e) {
return e.eax;
}
template <>
const Reg64 GetTempReg<Reg64>(X64Emitter& e) {
return e.rax;
}
template <typename SEQ, typename T>
struct SingleSequence : public Sequence<SingleSequence<SEQ, T>, T> {
typedef T EmitArgType;
static const uint32_t head_key = T::key;
static void Emit(X64Emitter& e, const EmitArgs& _) {
SEQ::Emit(e, _.i1);
}
template <typename REG_FN>
static void EmitUnaryOp(
X64Emitter& e, const EmitArgType& i,
const REG_FN& reg_fn) {
if (i.src1.is_constant) {
e.mov(i.dest, i.src1.constant());
reg_fn(e, i.dest);
} else {
if (i.dest != i.src1) {
e.mov(i.dest, i.src1);
}
reg_fn(e, i.dest);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitCommutativeBinaryOp(
X64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
if (i.dest == i.src2) {
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src1.constant()));
} else {
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
e.mov(temp, i.src1.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.mov(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2);
}
} else if (i.src2.is_constant) {
if (i.dest == i.src1) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.mov(i.dest, i.src2.constant());
reg_reg_fn(e, i.dest, i.src1);
}
} else {
if (i.dest == i.src1) {
reg_reg_fn(e, i.dest, i.src2);
} else if (i.dest == i.src2) {
reg_reg_fn(e, i.dest, i.src1);
} else {
e.mov(i.dest, i.src1);
reg_reg_fn(e, i.dest, i.src2);
}
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitAssociativeBinaryOp(
X64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
if (i.dest == i.src2) {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2);
e.mov(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, temp);
} else {
e.mov(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2);
}
} else if (i.src2.is_constant) {
if (i.dest == i.src1) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.mov(i.dest, i.src1);
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
}
} else {
if (i.dest == i.src1) {
reg_reg_fn(e, i.dest, i.src2);
} else if (i.dest == i.src2) {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2);
e.mov(i.dest, i.src1);
reg_reg_fn(e, i.dest, temp);
} else {
e.mov(i.dest, i.src1);
reg_reg_fn(e, i.dest, i.src2);
}
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitCommutativeCompareOp(
X64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.src2, static_cast<int32_t>(i.src1.constant()));
} else {
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
e.mov(temp, i.src1.constant());
reg_reg_fn(e, i.src2, temp);
}
} else if (i.src2.is_constant) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.src1, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.src1, temp);
}
} else {
reg_reg_fn(e, i.src1, i.src2);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitAssociativeCompareOp(
X64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, i.src2, static_cast<int32_t>(i.src1.constant()), true);
} else {
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
e.mov(temp, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2, temp, true);
}
} else if (i.src2.is_constant) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, i.src1, static_cast<int32_t>(i.src2.constant()), false);
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.dest, i.src1, temp, false);
}
} else {
reg_reg_fn(e, i.dest, i.src1, i.src2, false);
}
}
};
static const int ANY = -1;
typedef int tag_t;
static const tag_t TAG0 = 0;
static const tag_t TAG1 = 1;
static const tag_t TAG2 = 2;
static const tag_t TAG3 = 3;
static const tag_t TAG4 = 4;
static const tag_t TAG5 = 5;
static const tag_t TAG6 = 6;
static const tag_t TAG7 = 7;
typedef bool (*SequenceSelectFn)(X64Emitter&, const Instr*, const Instr**);
template <typename T>
void Register() {
sequence_table.insert({ T::head_key, T::Select });
}
template <typename T, typename Tn, typename... Ts>
void Register() {
Register<T>();
Register<Tn, Ts...>();
};
#define EMITTER_OPCODE_TABLE(name, ...) \
void Register_##name() { \
Register<__VA_ARGS__>(); \
}
#define MATCH(...) __VA_ARGS__
#define EMITTER(name, match) struct name : SingleSequence<name, match>
#define SEQUENCE(name, match) struct name : Sequence<name, match>
} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -2,32 +2,32 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. * * Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ #ifndef ALLOY_BACKEND_X64_X64_SEQUENCES_H_
#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ #define ALLOY_BACKEND_X64_X64_SEQUENCES_H_
#include <alloy/core.h> #include <alloy/core.h>
#include <alloy/hir/instr.h>
XEDECLARECLASS2(alloy, hir, Instr);
namespace alloy { namespace alloy {
namespace backend { namespace backend {
namespace x64 { namespace x64 {
namespace lowering {
class LoweringTable; class X64Emitter;
void RegisterSequences(LoweringTable* table);
void RegisterSequences();
bool SelectSequence(X64Emitter& e, const hir::Instr* i, const hir::Instr** new_tail);
} // namespace lowering
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace alloy } // namespace alloy
#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ #endif // ALLOY_BACKEND_X64_X64_SEQUENCES_H_

View File

@ -7,7 +7,7 @@
****************************************************************************** ******************************************************************************
*/ */
#include <alloy/backend/x64/lowering/tracers.h> #include <alloy/backend/x64/x64_tracers.h>
#include <alloy/backend/x64/x64_emitter.h> #include <alloy/backend/x64/x64_emitter.h>
#include <alloy/runtime/runtime.h> #include <alloy/runtime/runtime.h>
@ -15,19 +15,14 @@
using namespace alloy; using namespace alloy;
using namespace alloy::backend::x64; using namespace alloy::backend::x64;
using namespace alloy::backend::x64::lowering;
using namespace alloy::runtime; using namespace alloy::runtime;
namespace alloy { namespace alloy {
namespace backend { namespace backend {
namespace x64 { namespace x64 {
namespace lowering {
#define ITRACE 0
#define IFLUSH() #define DTRACE 0
#define IPRINT
#define DFLUSH()
#define DPRINT
#define TARGET_THREAD 1 #define TARGET_THREAD 1
@ -36,6 +31,16 @@ namespace lowering {
#define DFLUSH() fflush(stdout) #define DFLUSH() fflush(stdout)
#define DPRINT DFLUSH(); if (thread_state->thread_id() == TARGET_THREAD) printf #define DPRINT DFLUSH(); if (thread_state->thread_id() == TARGET_THREAD) printf
uint32_t GetTracingMode() {
uint32_t mode = 0;
#if ITRACE
mode |= TRACING_INSTR;
#endif // ITRACE
#if DTRACE
mode |= TRACING_DATA;
#endif // DTRACE
return mode;
}
void TraceString(void* raw_context, const char* str) { void TraceString(void* raw_context, const char* str) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
@ -190,7 +195,6 @@ void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) {
} }
} // namespace lowering
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace alloy } // namespace alloy

View File

@ -7,8 +7,8 @@
****************************************************************************** ******************************************************************************
*/ */
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ #ifndef ALLOY_BACKEND_X64_X64_TRACERS_H_
#define ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ #define ALLOY_BACKEND_X64_X64_TRACERS_H_
#include <alloy/core.h> #include <alloy/core.h>
@ -33,7 +33,15 @@ namespace alloy {
namespace backend { namespace backend {
namespace x64 { namespace x64 {
class X64Emitter; class X64Emitter;
namespace lowering {
enum TracingMode {
TRACING_INSTR = (1 << 1),
TRACING_DATA = (1 << 2),
};
uint32_t GetTracingMode();
inline bool IsTracingInstr() { return (GetTracingMode() & TRACING_INSTR) != 0; }
inline bool IsTracingData() { return (GetTracingMode() & TRACING_DATA) != 0; }
void TraceString(void* raw_context, const char* str); void TraceString(void* raw_context, const char* str);
@ -69,10 +77,9 @@ void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value); void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value); void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value);
} // namespace lowering
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace alloy } // namespace alloy
#endif // ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ #endif // ALLOY_BACKEND_X64_X64_TRACERS_H_

View File

@ -368,6 +368,13 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
i->Remove(); i->Remove();
} }
break; break;
case OPCODE_CNTLZ:
if (i->src1.value->IsConstant()) {
v->set_zero(v->type);
v->CountLeadingZeros(i->src1.value->constant);
i->Remove();
}
break;
// TODO(benvanik): INSERT/EXTRACT // TODO(benvanik): INSERT/EXTRACT
// TODO(benvanik): SPLAT/PERMUTE/SWIZZLE // TODO(benvanik): SPLAT/PERMUTE/SWIZZLE
case OPCODE_SPLAT: case OPCODE_SPLAT:

View File

@ -9,6 +9,8 @@
#include <alloy/compiler/passes/context_promotion_pass.h> #include <alloy/compiler/passes/context_promotion_pass.h>
#include <gflags/gflags.h>
#include <alloy/compiler/compiler.h> #include <alloy/compiler/compiler.h>
#include <alloy/runtime/runtime.h> #include <alloy/runtime/runtime.h>
@ -20,6 +22,10 @@ using namespace alloy::hir;
using namespace alloy::runtime; using namespace alloy::runtime;
DEFINE_bool(store_all_context_values, false,
"Don't strip dead context stores to aid in debugging.");
ContextPromotionPass::ContextPromotionPass() : ContextPromotionPass::ContextPromotionPass() :
context_values_size_(0), context_values_(0), context_values_size_(0), context_values_(0),
CompilerPass() { CompilerPass() {
@ -69,11 +75,13 @@ int ContextPromotionPass::Run(HIRBuilder* builder) {
} }
// Remove all dead stores. // Remove all dead stores.
if (!FLAGS_store_all_context_values) {
block = builder->first_block(); block = builder->first_block();
while (block) { while (block) {
RemoveDeadStoresBlock(block); RemoveDeadStoresBlock(block);
block = block->next; block = block->next;
} }
}
return 0; return 0;
} }

View File

@ -13,12 +13,6 @@
#include <alloy/compiler/compiler.h> #include <alloy/compiler/compiler.h>
#include <alloy/runtime/runtime.h> #include <alloy/runtime/runtime.h>
#pragma warning(push)
#pragma warning(disable : 4244)
#pragma warning(disable : 4267)
#include <llvm/ADT/BitVector.h>
#pragma warning(pop)
using namespace alloy; using namespace alloy;
using namespace alloy::backend; using namespace alloy::backend;
using namespace alloy::compiler; using namespace alloy::compiler;

View File

@ -36,8 +36,6 @@ DataFlowAnalysisPass::~DataFlowAnalysisPass() {
} }
int DataFlowAnalysisPass::Run(HIRBuilder* builder) { int DataFlowAnalysisPass::Run(HIRBuilder* builder) {
auto arena = builder->arena();
// Linearize blocks so that we can detect cycles and propagate dependencies. // Linearize blocks so that we can detect cycles and propagate dependencies.
uint32_t block_count = LinearizeBlocks(builder); uint32_t block_count = LinearizeBlocks(builder);

View File

@ -9,6 +9,8 @@
#include <alloy/compiler/passes/register_allocation_pass.h> #include <alloy/compiler/passes/register_allocation_pass.h>
#include <algorithm>
using namespace alloy; using namespace alloy;
using namespace alloy::backend; using namespace alloy::backend;
using namespace alloy::compiler; using namespace alloy::compiler;
@ -16,180 +18,135 @@ using namespace alloy::compiler::passes;
using namespace alloy::hir; using namespace alloy::hir;
struct RegisterAllocationPass::Interval { #define ASSERT_NO_CYCLES 0
uint32_t start_ordinal;
uint32_t end_ordinal;
Value* value;
RegisterFreeUntilSet* free_until_set;
// TODO(benvanik): reduce to offsets in arena?
struct Interval* next;
struct Interval* prev;
void AddToList(Interval** list_head) {
auto list_next = *list_head;
this->next = list_next;
if (list_next) {
list_next->prev = this;
}
*list_head = this;
}
void InsertIntoList(Interval** list_head) {
auto it = *list_head;
while (it) {
if (it->start_ordinal > this->start_ordinal) {
// Went too far. Insert before this interval.
this->prev = it->prev;
this->next = it;
if (it->prev) {
it->prev->next = this;
} else {
*list_head = this;
}
it->prev = this;
return;
}
if (!it->next) {
// None found, add at tail.
it->next = this;
this->prev = it;
return;
}
it = it->next;
}
}
void RemoveFromList(Interval** list_head) {
if (this->next) {
this->next->prev = this->prev;
}
if (this->prev) {
this->prev->next = this->next;
} else {
*list_head = this->next;
}
this->next = this->prev = NULL;
}
};
struct RegisterAllocationPass::Intervals {
Interval* unhandled;
Interval* active;
Interval* handled;
};
RegisterAllocationPass::RegisterAllocationPass( RegisterAllocationPass::RegisterAllocationPass(
const MachineInfo* machine_info) : const MachineInfo* machine_info) :
machine_info_(machine_info), machine_info_(machine_info),
CompilerPass() { CompilerPass() {
// Initialize register sets. The values of these will be // Initialize register sets.
// cleared before use, so just the structure is required. // TODO(benvanik): rewrite in a way that makes sense - this is terrible.
auto mi_sets = machine_info->register_sets; auto mi_sets = machine_info->register_sets;
xe_zero_struct(&free_until_sets_, sizeof(free_until_sets_)); xe_zero_struct(&usage_sets_, sizeof(usage_sets_));
uint32_t n = 0; uint32_t n = 0;
while (mi_sets[n].count) { while (mi_sets[n].count) {
auto& mi_set = mi_sets[n]; auto& mi_set = mi_sets[n];
auto free_until_set = new RegisterFreeUntilSet(); auto usage_set = new RegisterSetUsage();
free_until_sets_.all_sets[n] = free_until_set; usage_sets_.all_sets[n] = usage_set;
free_until_set->count = mi_set.count; usage_set->count = mi_set.count;
free_until_set->set = &mi_set; usage_set->set = &mi_set;
if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) { if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) {
free_until_sets_.int_set = free_until_set; usage_sets_.int_set = usage_set;
} }
if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) { if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) {
free_until_sets_.float_set = free_until_set; usage_sets_.float_set = usage_set;
} }
if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) { if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) {
free_until_sets_.vec_set = free_until_set; usage_sets_.vec_set = usage_set;
} }
n++; n++;
} }
} }
RegisterAllocationPass::~RegisterAllocationPass() { RegisterAllocationPass::~RegisterAllocationPass() {
for (size_t n = 0; n < XECOUNT(free_until_sets_.all_sets); n++) { for (size_t n = 0; n < XECOUNT(usage_sets_.all_sets); n++) {
if (!free_until_sets_.all_sets[n]) { if (!usage_sets_.all_sets[n]) {
break; break;
} }
delete free_until_sets_.all_sets[n]; delete usage_sets_.all_sets[n];
} }
} }
int RegisterAllocationPass::Run(HIRBuilder* builder) { int RegisterAllocationPass::Run(HIRBuilder* builder) {
// A (probably broken) implementation of a linear scan register allocator // Simple per-block allocator that operates on SSA form.
// that operates directly on SSA form: // Registers do not move across blocks, though this could be
// http://www.christianwimmer.at/Publications/Wimmer10a/Wimmer10a.pdf // optimized with some intra-block analysis (dominators/etc).
// // Really, it'd just be nice to have someone who knew what they
// Requirements: // were doing lower SSA and do this right.
// - SSA form (single definition for variables)
// - block should be in linear order:
// - dominators *should* come before (a->b->c)
// - loop block sequences *should not* have intervening non-loop blocks
auto arena = scratch_arena();
// Renumber everything.
uint32_t block_ordinal = 0; uint32_t block_ordinal = 0;
uint32_t instr_ordinal = 0; uint32_t instr_ordinal = 0;
auto block = builder->first_block(); auto block = builder->first_block();
while (block) { while (block) {
// Sequential block ordinals. // Sequential block ordinals.
block->ordinal = block_ordinal++; block->ordinal = block_ordinal++;
// Reset all state.
PrepareBlockState();
// Renumber all instructions in the block. This is required so that
// we can sort the usage pointers below.
auto instr = block->instr_head; auto instr = block->instr_head;
while (instr) { while (instr) {
// Sequential global instruction ordinals. // Sequential global instruction ordinals.
instr->ordinal = instr_ordinal++; instr->ordinal = instr_ordinal++;
instr = instr->next; instr = instr->next;
} }
block = block->next;
}
// Compute all liveness ranges by walking forward through all instr = block->instr_head;
// blocks/instructions and checking the last use of each value. This lets
// us know the exact order in (block#,instr#) form, which is then used to
// setup the range.
// TODO(benvanik): ideally we would have a list of all values and not have
// to keep walking instructions over and over.
Interval* prev_interval = NULL;
Interval* head_interval = NULL;
block = builder->first_block();
while (block) {
auto instr = block->instr_head;
while (instr) { while (instr) {
// Compute last-use for the dest value.
// Since we know all values of importance must be defined, we can avoid
// having to check every value and just look at dest.
const OpcodeInfo* info = instr->opcode; const OpcodeInfo* info = instr->opcode;
if (GET_OPCODE_SIG_TYPE_DEST(info->signature) == OPCODE_SIG_TYPE_V) { uint32_t signature = info->signature;
auto v = instr->dest;
if (!v->last_use) { // Update the register use heaps.
ComputeLastUse(v); AdvanceUses(instr);
// Check sources for retirement. If any are unused after this instruction
// we can eagerly evict them to speed up register allocation.
// Since X64 (and other platforms) can often take advantage of dest==src1
// register mappings we track retired src1 so that we can attempt to
// reuse it.
// NOTE: these checks require that the usage list be sorted!
bool has_preferred_reg = false;
RegAssignment preferred_reg = { 0 };
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V &&
!instr->src1.value->IsConstant()) {
if (!instr->src1_use->next) {
// Pull off preferred register. We will try to reuse this for the
// dest.
has_preferred_reg = true;
preferred_reg = instr->src1.value->reg;
XEASSERTNOTNULL(preferred_reg.set);
}
} }
// Add interval. if (GET_OPCODE_SIG_TYPE_DEST(signature) == OPCODE_SIG_TYPE_V) {
auto interval = arena->Alloc<Interval>(); // Must not have been set already.
interval->start_ordinal = instr->ordinal; XEASSERTNULL(instr->dest->reg.set);
interval->end_ordinal = v->last_use ?
v->last_use->ordinal : v->def->ordinal;
interval->value = v;
interval->next = NULL;
interval->prev = prev_interval;
if (prev_interval) {
prev_interval->next = interval;
} else {
head_interval = interval;
}
prev_interval = interval;
// Grab register set to use. // Sort the usage list. We depend on this in future uses of this variable.
// We do this now so it's only once per interval, and it makes it easy SortUsageList(instr->dest);
// to only compare intervals that overlap their sets.
if (v->type <= INT64_TYPE) { // If we have a preferred register, use that.
interval->free_until_set = free_until_sets_.int_set; // This way we can help along the stupid X86 two opcode instructions.
} else if (v->type <= FLOAT64_TYPE) { bool allocated;
interval->free_until_set = free_until_sets_.float_set; if (has_preferred_reg) {
// Allocate with the given preferred register. If the register is in
// the wrong set it will not be reused.
allocated = TryAllocateRegister(instr->dest, preferred_reg);
} else { } else {
interval->free_until_set = free_until_sets_.vec_set; // Allocate a register. This will either reserve a free one or
// spill and reuse an active one.
allocated = TryAllocateRegister(instr->dest);
}
if (!allocated) {
// Failed to allocate register -- need to spill and try again.
// We spill only those registers we aren't using.
if (!SpillOneRegister(builder, instr->dest->type)) {
// Unable to spill anything - this shouldn't happen.
XELOGE("Unable to spill any registers");
XEASSERTALWAYS();
return 1;
}
// Demand allocation.
if (!TryAllocateRegister(instr->dest)) {
// Boned.
XELOGE("Register allocation failed");
XEASSERTALWAYS();
return 1;
}
} }
} }
@ -198,228 +155,266 @@ int RegisterAllocationPass::Run(HIRBuilder* builder) {
block = block->next; block = block->next;
} }
// Now have a sorted list of intervals, minus their ending ordinals.
Intervals intervals;
intervals.unhandled = head_interval;
intervals.active = intervals.handled = NULL;
while (intervals.unhandled) {
// Get next unhandled interval.
auto current = intervals.unhandled;
intervals.unhandled = intervals.unhandled->next;
current->RemoveFromList(&intervals.unhandled);
// Check for intervals in active that are handled or inactive.
auto it = intervals.active;
while (it) {
auto next = it->next;
if (it->end_ordinal <= current->start_ordinal) {
// Move from active to handled.
it->RemoveFromList(&intervals.active);
it->AddToList(&intervals.handled);
}
it = next;
}
// Find a register for current.
if (!TryAllocateFreeReg(current, intervals)) {
// Failed, spill.
AllocateBlockedReg(builder, current, intervals);
}
if (current->value->reg.index!= -1) {
// Add current to active.
current->AddToList(&intervals.active);
}
}
return 0; return 0;
} }
void RegisterAllocationPass::ComputeLastUse(Value* value) { void RegisterAllocationPass::DumpUsage(const char* name) {
// TODO(benvanik): compute during construction? #if 0
// Note that this list isn't sorted (unfortunately), so we have to scan fprintf(stdout, "\n%s:\n", name);
// them all. for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
uint32_t max_ordinal = 0; auto usage_set = usage_sets_.all_sets[i];
Value::Use* last_use = NULL; if (usage_set) {
auto use = value->use_head; fprintf(stdout, "set %s:\n", usage_set->set->name);
while (use) { fprintf(stdout, " avail: %s\n", usage_set->availability.to_string().c_str());
if (!last_use || use->instr->ordinal >= max_ordinal) { fprintf(stdout, " upcoming uses:\n");
last_use = use; for (auto it = usage_set->upcoming_uses.begin();
max_ordinal = use->instr->ordinal; it != usage_set->upcoming_uses.end(); ++it) {
fprintf(stdout, " v%d, used at %d\n",
it->value->ordinal,
it->use->instr->ordinal);
} }
use = use->next;
} }
value->last_use = last_use ? last_use->instr : NULL; }
fflush(stdout);
#endif
} }
bool RegisterAllocationPass::TryAllocateFreeReg(
Interval* current, Intervals& intervals) {
// Reset all registers in the set to unused.
auto free_until_set = current->free_until_set;
for (uint32_t n = 0; n < free_until_set->count; n++) {
free_until_set->pos[n] = -1;
}
// Mark all active registers as used. void RegisterAllocationPass::PrepareBlockState() {
// TODO(benvanik): keep some kind of bitvector so that this is instant? for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
auto it = intervals.active; auto usage_set = usage_sets_.all_sets[i];
while (it) { if (usage_set) {
if (it->free_until_set == free_until_set) { usage_set->availability.set();
free_until_set->pos[it->value->reg.index] = 0; usage_set->upcoming_uses.clear();
}
it = it->next;
}
uint32_t max_pos = 0;
for (uint32_t n = 0; n < free_until_set->count; n++) {
if (max_pos == -1) {
max_pos = n;
} else {
if (free_until_set->pos[n] > free_until_set->pos[max_pos]) {
max_pos = n;
} }
} }
} DumpUsage("PrepareBlockState");
if (!free_until_set->pos[max_pos]) {
// No register available without spilling.
return false;
}
if (current->end_ordinal < free_until_set->pos[max_pos]) {
// Register available for the whole interval.
current->value->reg.set = free_until_set->set;
current->value->reg.index = max_pos;
} else {
// Register available for the first part of the interval.
// Split the interval at where it hits the next one.
//current->value->reg = max_pos;
//SplitRange(current, free_until_set->pos[max_pos]);
// TODO(benvanik): actually split -- for now we just spill.
return false;
}
return true;
} }
void RegisterAllocationPass::AllocateBlockedReg( void RegisterAllocationPass::AdvanceUses(Instr* instr) {
HIRBuilder* builder, Interval* current, Intervals& intervals) { for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
auto free_until_set = current->free_until_set; auto usage_set = usage_sets_.all_sets[i];
if (!usage_set) {
// TODO(benvanik): smart heuristics.
// wimmer AllocateBlockedReg has some stuff for deciding whether to
// spill current or some other active interval - which we ignore.
// Pick a random interval. Maybe the first. Sure.
auto spill_interval = intervals.active;
Value* spill_value = NULL;
Instr* prev_use = NULL;
Instr* next_use = NULL;
while (spill_interval) {
if (spill_interval->free_until_set != free_until_set ||
spill_interval->start_ordinal == current->start_ordinal) {
// Only interested in ones of the same register set.
// We also ensure that ones at the same ordinal as us are ignored,
// which can happen with multiple local inserts/etc.
spill_interval = spill_interval->next;
continue;
}
spill_value = spill_interval->value;
// Find the uses right before/after current.
auto use = spill_value->use_head;
while (use) {
if (use->instr->ordinal != -1) {
if (use->instr->ordinal < current->start_ordinal) {
if (!prev_use || prev_use->ordinal < use->instr->ordinal) {
prev_use = use->instr;
}
} else if (use->instr->ordinal > current->start_ordinal) {
if (!next_use || next_use->ordinal > use->instr->ordinal) {
next_use = use->instr;
}
}
}
use = use->next;
}
if (!prev_use) {
prev_use = spill_value->def;
}
if (prev_use->next == next_use) {
// Uh, this interval is way too short.
spill_interval = spill_interval->next;
continue;
}
XEASSERT(prev_use->ordinal != -1);
XEASSERTNOTNULL(next_use);
break; break;
} }
XEASSERT(spill_interval->free_until_set == free_until_set); auto& upcoming_uses = usage_set->upcoming_uses;
for (auto it = upcoming_uses.begin(); it != upcoming_uses.end();) {
if (!it->use) {
// No uses at all - we can remove right away.
// This comes up from instructions where the dest is never used,
// like the ATOMIC ops.
MarkRegAvailable(it->value->reg);
it = upcoming_uses.erase(it);
continue;
}
if (it->use->instr != instr) {
// Not yet at this instruction.
++it;
continue;
}
// The use is from this instruction.
if (!it->use->next) {
// Last use of the value. We can retire it now.
MarkRegAvailable(it->value->reg);
it = upcoming_uses.erase(it);
} else {
// Used again. Push back the next use.
// Note that we may be used multiple times this instruction, so
// eat those.
auto next_use = it->use->next;
while (next_use->next && next_use->instr == instr) {
next_use = next_use->next;
}
// Remove the iterator.
auto value = it->value;
it = upcoming_uses.erase(it);
upcoming_uses.emplace_back(value, next_use);
}
}
}
DumpUsage("AdvanceUses");
}
// Find the real last use -- paired ops may require sequences to stay bool RegisterAllocationPass::IsRegInUse(const RegAssignment& reg) {
// intact. This is a bad design. RegisterSetUsage* usage_set;
auto prev_def_tail = prev_use; if (reg.set == usage_sets_.int_set->set) {
while (prev_def_tail && usage_set = usage_sets_.int_set;
prev_def_tail->opcode->flags & OPCODE_FLAG_PAIRED_PREV) { } else if (reg.set == usage_sets_.float_set->set) {
prev_def_tail = prev_def_tail->prev; usage_set = usage_sets_.float_set;
} else {
usage_set = usage_sets_.vec_set;
}
return !usage_set->availability.test(reg.index);
}
RegisterAllocationPass::RegisterSetUsage*
RegisterAllocationPass::MarkRegUsed(const RegAssignment& reg,
Value* value, Value::Use* use) {
auto usage_set = RegisterSetForValue(value);
usage_set->availability.set(reg.index, false);
usage_set->upcoming_uses.emplace_back(value, use);
DumpUsage("MarkRegUsed");
return usage_set;
}
RegisterAllocationPass::RegisterSetUsage*
RegisterAllocationPass::MarkRegAvailable(const hir::RegAssignment& reg) {
RegisterSetUsage* usage_set;
if (reg.set == usage_sets_.int_set->set) {
usage_set = usage_sets_.int_set;
} else if (reg.set == usage_sets_.float_set->set) {
usage_set = usage_sets_.float_set;
} else {
usage_set = usage_sets_.vec_set;
}
usage_set->availability.set(reg.index, true);
return usage_set;
}
bool RegisterAllocationPass::TryAllocateRegister(
Value* value, const RegAssignment& preferred_reg) {
// If the preferred register matches type and is available, use it.
auto usage_set = RegisterSetForValue(value);
if (usage_set->set == preferred_reg.set) {
// Check if available.
if (!IsRegInUse(preferred_reg)) {
// Mark as in-use and return. Best case.
MarkRegUsed(preferred_reg, value, value->use_head);
value->reg = preferred_reg;
return true;
}
} }
Value* new_value; // Otherwise, fallback to allocating like normal.
uint32_t end_ordinal; return TryAllocateRegister(value);
}
bool RegisterAllocationPass::TryAllocateRegister(Value* value) {
// Get the set this register is in.
RegisterSetUsage* usage_set = RegisterSetForValue(value);
// Find the first free register, if any.
// We have to ensure it's a valid one (in our count).
unsigned long first_unused = 0;
bool all_used = _BitScanForward(&first_unused, usage_set->availability.to_ulong()) == 0;
if (!all_used && first_unused < usage_set->count) {
// Available! Use it!.
value->reg.set = usage_set->set;
value->reg.index = first_unused;
MarkRegUsed(value->reg, value, value->use_head);
return true;
}
// None available! Spill required.
return false;
}
bool RegisterAllocationPass::SpillOneRegister(
HIRBuilder* builder, TypeName required_type) {
// Get the set that we will be picking from.
RegisterSetUsage* usage_set;
if (required_type <= INT64_TYPE) {
usage_set = usage_sets_.int_set;
} else if (required_type <= FLOAT64_TYPE) {
usage_set = usage_sets_.float_set;
} else {
usage_set = usage_sets_.vec_set;
}
DumpUsage("SpillOneRegister (pre)");
// Pick the one with the furthest next use.
XEASSERT(!usage_set->upcoming_uses.empty());
auto furthest_usage = std::max_element(
usage_set->upcoming_uses.begin(), usage_set->upcoming_uses.end(),
RegisterUsage::Comparer());
Value* spill_value = furthest_usage->value;
Value::Use* prev_use = furthest_usage->use->prev;
Value::Use* next_use = furthest_usage->use;
XEASSERTNOTNULL(next_use);
usage_set->upcoming_uses.erase(furthest_usage);
DumpUsage("SpillOneRegister (post)");
const auto reg = spill_value->reg;
// We know the spill_value use list is sorted, so we can cut it right now.
// This makes it easier down below.
auto new_head_use = next_use;
// Allocate local.
if (spill_value->local_slot) { if (spill_value->local_slot) {
// Value is already assigned a slot, so load from that. // Value is already assigned a slot. Since we allocate in order and this is
// We can then split the interval right after the previous use to // all SSA we know the stored value will be exactly what we want. Yay,
// before the next use. // we can prevent the redundant store!
// In fact, we may even want to pin this spilled value so that we always
// Update the last use of the spilled interval/value. // use the spilled value and prevent the need for more locals.
end_ordinal = spill_interval->end_ordinal;
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
XEASSERT(end_ordinal != -1);
XEASSERT(spill_interval->end_ordinal != -1);
// Insert a load right before the next use.
new_value = builder->LoadLocal(spill_value->local_slot);
builder->last_instr()->MoveBefore(next_use);
// Update last use info.
new_value->last_use = spill_value->last_use;
spill_value->last_use = prev_use;
} else { } else {
// Allocate a local slot. // Allocate a local slot.
spill_value->local_slot = builder->AllocLocal(spill_value->type); spill_value->local_slot = builder->AllocLocal(spill_value->type);
// Insert a spill right after the def. // Add store.
builder->StoreLocal(spill_value->local_slot, spill_value); builder->StoreLocal(spill_value->local_slot, spill_value);
auto spill_store = builder->last_instr(); auto spill_store = builder->last_instr();
spill_store->MoveBefore(prev_def_tail->next); auto spill_store_use = spill_store->src2_use;
XEASSERTNULL(spill_store_use->prev);
if (prev_use && prev_use->instr->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
// Instruction is paired. This is bad. We will insert the spill after the
// paired instruction.
XEASSERTNOTNULL(prev_use->instr->next);
spill_store->MoveBefore(prev_use->instr->next);
// Update last use of spilled interval/value. // Update last use.
end_ordinal = spill_interval->end_ordinal; spill_value->last_use = spill_store;
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal; } else if (prev_use) {
XEASSERT(end_ordinal != -1); // We insert the store immediately before the previous use.
XEASSERT(spill_interval->end_ordinal != -1); // If we were smarter we could then re-run allocation and reuse the register
// once dropped.
spill_store->MoveBefore(prev_use->instr);
// Insert a load right before the next use. // Update last use.
new_value = builder->LoadLocal(spill_value->local_slot); spill_value->last_use = prev_use->instr;
builder->last_instr()->MoveBefore(next_use); } else {
// This is the first use, so the only thing we have is the define.
// Move the store to right after that.
spill_store->MoveBefore(spill_value->def->next);
// Update last use info. // Update last use.
new_value->last_use = spill_value->last_use;
spill_value->last_use = spill_store; spill_value->last_use = spill_store;
} }
}
// Reuse the same local slot. Hooray SSA. #if ASSERT_NO_CYCLES
builder->AssertNoCycles();
spill_value->def->block->AssertNoCycles();
#endif // ASSERT_NO_CYCLES
// Add load.
// Inserted immediately before the next use. Since by definition the next
// use is after the instruction requesting the spill we know we haven't
// done allocation for that code yet and can let that be handled
// automatically when we get to it.
auto new_value = builder->LoadLocal(spill_value->local_slot);
auto spill_load = builder->last_instr();
spill_load->MoveBefore(next_use->instr);
// Note: implicit first use added.
#if ASSERT_NO_CYCLES
builder->AssertNoCycles();
spill_value->def->block->AssertNoCycles();
#endif // ASSERT_NO_CYCLES
// Set the local slot of the new value to our existing one. This way we will
// reuse that same memory if needed.
new_value->local_slot = spill_value->local_slot; new_value->local_slot = spill_value->local_slot;
// Rename all future uses to that loaded value. // Rename all future uses of the SSA value to the new value as loaded
auto use = spill_value->use_head; // from the local.
while (use) { // We can quickly do this by walking the use list. Because the list is
// TODO(benvanik): keep use list sorted so we don't have to do this. // already sorted we know we are going to end up with a sorted list.
if (use->instr->ordinal <= spill_interval->end_ordinal || auto walk_use = new_head_use;
use->instr->ordinal == -1) { auto new_use_tail = walk_use;
use = use->next; while (walk_use) {
continue; auto next_walk_use = walk_use->next;
} auto instr = walk_use->instr;
auto next = use->next;
auto instr = use->instr;
uint32_t signature = instr->opcode->signature; uint32_t signature = instr->opcode->signature;
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) { if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src1.value == spill_value) { if (instr->src1.value == spill_value) {
@ -436,36 +431,107 @@ void RegisterAllocationPass::AllocateBlockedReg(
instr->set_src3(new_value); instr->set_src3(new_value);
} }
} }
use = next;
walk_use = next_walk_use;
if (walk_use) {
new_use_tail = walk_use;
} }
// Create new interval.
auto arena = scratch_arena();
auto new_interval = arena->Alloc<Interval>();
new_interval->start_ordinal = new_value->def->ordinal;
new_interval->end_ordinal = end_ordinal;
new_interval->value = new_value;
new_interval->next = NULL;
new_interval->prev = NULL;
if (new_value->type <= INT64_TYPE) {
new_interval->free_until_set = free_until_sets_.int_set;
} else if (new_value->type <= FLOAT64_TYPE) {
new_interval->free_until_set = free_until_sets_.float_set;
} else {
new_interval->free_until_set = free_until_sets_.vec_set;
} }
new_value->last_use = new_use_tail->instr;
// Remove the old interval from the active list, as it's been spilled. // Update tracking.
spill_interval->RemoveFromList(&intervals.active); MarkRegAvailable(reg);
spill_interval->AddToList(&intervals.handled);
// Insert interval into the right place in the list. return true;
// We know it's ahead of us. }
new_interval->InsertIntoList(&intervals.unhandled);
RegisterAllocationPass::RegisterSetUsage*
// TODO(benvanik): use the register we just freed? RegisterAllocationPass::RegisterSetForValue(
//current->value->reg.set = free_until_set->set; const Value* value) {
//current->value->reg.index = spill_interval->value->reg.index; if (value->type <= INT64_TYPE) {
bool allocated = TryAllocateFreeReg(current, intervals); return usage_sets_.int_set;
XEASSERTTRUE(allocated); } else if (value->type <= FLOAT64_TYPE) {
return usage_sets_.float_set;
} else {
return usage_sets_.vec_set;
}
}
namespace {
int CompareValueUse(const Value::Use* a, const Value::Use* b) {
return a->instr->ordinal - b->instr->ordinal;
}
} // namespace
void RegisterAllocationPass::SortUsageList(Value* value) {
// Modified in-place linked list sort from:
// http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.c
if (!value->use_head) {
return;
}
Value::Use* head = value->use_head;
Value::Use* tail = nullptr;
int insize = 1;
while (true) {
auto p = head;
head = nullptr;
tail = nullptr;
// count number of merges we do in this pass
int nmerges = 0;
while (p) {
// there exists a merge to be done
nmerges++;
// step 'insize' places along from p
auto q = p;
int psize = 0;
for (int i = 0; i < insize; i++) {
psize++;
q = q->next;
if (!q) break;
}
// if q hasn't fallen off end, we have two lists to merge
int qsize = insize;
// now we have two lists; merge them
while (psize > 0 || (qsize > 0 && q)) {
// decide whether next element of merge comes from p or q
Value::Use* e = nullptr;
if (psize == 0) {
// p is empty; e must come from q
e = q; q = q->next; qsize--;
} else if (qsize == 0 || !q) {
// q is empty; e must come from p
e = p; p = p->next; psize--;
} else if (CompareValueUse(p, q) <= 0) {
// First element of p is lower (or same); e must come from p
e = p; p = p->next; psize--;
} else {
// First element of q is lower; e must come from q
e = q; q = q->next; qsize--;
}
// add the next element to the merged list
if (tail) {
tail->next = e;
} else {
head = e;
}
// Maintain reverse pointers in a doubly linked list.
e->prev = tail;
tail = e;
}
// now p has stepped 'insize' places along, and q has too
p = q;
}
if (tail) {
tail->next = nullptr;
}
// If we have done only one merge, we're finished
if (nmerges <= 1) {
// allow for nmerges==0, the empty list case
break;
}
// Otherwise repeat, merging lists twice the size
insize *= 2;
}
value->use_head = head;
value->last_use = tail->instr;
} }

View File

@ -10,6 +10,10 @@
#ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_ #ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
#define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_ #define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
#include <algorithm>
#include <bitset>
#include <vector>
#include <alloy/backend/machine_info.h> #include <alloy/backend/machine_info.h>
#include <alloy/compiler/compiler_pass.h> #include <alloy/compiler/compiler_pass.h>
@ -27,28 +31,53 @@ public:
virtual int Run(hir::HIRBuilder* builder); virtual int Run(hir::HIRBuilder* builder);
private: private:
struct Interval; // TODO(benvanik): rewrite all this set shit -- too much indirection, the
struct Intervals; // complexity is not needed.
void ComputeLastUse(hir::Value* value); struct RegisterUsage {
bool TryAllocateFreeReg(Interval* current, Intervals& intervals); hir::Value* value;
void AllocateBlockedReg(hir::HIRBuilder* builder, hir::Value::Use* use;
Interval* current, Intervals& intervals); RegisterUsage() : value(nullptr), use(nullptr) {}
RegisterUsage(hir::Value* value_, hir::Value::Use* use_)
: value(value_), use(use_) {}
struct Comparer : std::binary_function<RegisterUsage, RegisterUsage, bool> {
bool operator()(const RegisterUsage& a, const RegisterUsage& b) const {
return a.use->instr->ordinal < b.use->instr->ordinal;
}
};
};
struct RegisterSetUsage {
const backend::MachineInfo::RegisterSet* set = nullptr;
uint32_t count = 0;
std::bitset<32> availability = 0;
// TODO(benvanik): another data type.
std::vector<RegisterUsage> upcoming_uses;
};
void DumpUsage(const char* name);
void PrepareBlockState();
void AdvanceUses(hir::Instr* instr);
bool IsRegInUse(const hir::RegAssignment& reg);
RegisterSetUsage* MarkRegUsed(const hir::RegAssignment& reg,
hir::Value* value, hir::Value::Use* use);
RegisterSetUsage* MarkRegAvailable(const hir::RegAssignment& reg);
bool TryAllocateRegister(hir::Value* value,
const hir::RegAssignment& preferred_reg);
bool TryAllocateRegister(hir::Value* value);
bool SpillOneRegister(hir::HIRBuilder* builder, hir::TypeName required_type);
RegisterSetUsage* RegisterSetForValue(const hir::Value* value);
void SortUsageList(hir::Value* value);
private: private:
const backend::MachineInfo* machine_info_; const backend::MachineInfo* machine_info_;
struct {
struct RegisterFreeUntilSet { RegisterSetUsage* int_set = nullptr;
uint32_t count; RegisterSetUsage* float_set = nullptr;
uint32_t pos[32]; RegisterSetUsage* vec_set = nullptr;
const backend::MachineInfo::RegisterSet* set; RegisterSetUsage* all_sets[3];
}; } usage_sets_;
struct RegisterFreeUntilSets {
RegisterFreeUntilSet* int_set;
RegisterFreeUntilSet* float_set;
RegisterFreeUntilSet* vec_set;
RegisterFreeUntilSet* all_sets[3];
};
RegisterFreeUntilSets free_until_sets_;
}; };

View File

@ -88,12 +88,12 @@ int ValidationPass::ValidateInstruction(Block* block, Instr* instr) {
} }
int ValidationPass::ValidateValue(Block* block, Instr* instr, Value* value) { int ValidationPass::ValidateValue(Block* block, Instr* instr, Value* value) {
if (value->def) { //if (value->def) {
/*auto def = value->def; // auto def = value->def;
XEASSERT(def->block == block); // XEASSERT(def->block == block);
if (def->block != block) { // if (def->block != block) {
return 1; // return 1;
}*/ // }
} //}
return 0; return 0;
} }

View File

@ -44,6 +44,10 @@ typedef struct XECACHEALIGN vec128_s {
uint64_t high; uint64_t high;
}; };
}; };
bool operator== (const vec128_s& b) const {
return low == b.low && high == b.high;
}
} vec128_t; } vec128_t;
XEFORCEINLINE vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) { XEFORCEINLINE vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) {
vec128_t v; vec128_t v;

View File

@ -643,20 +643,20 @@ XEEMITTER(cmpli, 0x28000000, D )(PPCHIRBuilder& f, InstrData& i) {
XEEMITTER(andx, 0x7C000038, X )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(andx, 0x7C000038, X )(PPCHIRBuilder& f, InstrData& i) {
// RA <- (RS) & (RB) // RA <- (RS) & (RB)
Value* ra = f.And(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); Value* ra = f.And(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
XEEMITTER(andcx, 0x7C000078, X )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(andcx, 0x7C000078, X )(PPCHIRBuilder& f, InstrData& i) {
// RA <- (RS) & ¬(RB) // RA <- (RS) & ¬(RB)
Value* ra = f.And(f.LoadGPR(i.X.RT), f.Not(f.LoadGPR(i.X.RB))); Value* ra = f.And(f.LoadGPR(i.X.RT), f.Not(f.LoadGPR(i.X.RB)));
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -665,8 +665,8 @@ XEEMITTER(andix, 0x70000000, D )(PPCHIRBuilder& f, InstrData& i) {
Value* ra = f.And( Value* ra = f.And(
f.LoadGPR(i.D.RT), f.LoadGPR(i.D.RT),
f.LoadConstant((uint64_t)i.D.DS)); f.LoadConstant((uint64_t)i.D.DS));
f.UpdateCR(0, ra);
f.StoreGPR(i.D.RA, ra); f.StoreGPR(i.D.RA, ra);
f.UpdateCR(0, ra);
return 0; return 0;
} }
@ -675,8 +675,8 @@ XEEMITTER(andisx, 0x74000000, D )(PPCHIRBuilder& f, InstrData& i) {
Value* ra = f.And( Value* ra = f.And(
f.LoadGPR(i.D.RT), f.LoadGPR(i.D.RT),
f.LoadConstant((uint64_t(i.D.DS) << 16))); f.LoadConstant((uint64_t(i.D.DS) << 16)));
f.UpdateCR(0, ra);
f.StoreGPR(i.D.RA, ra); f.StoreGPR(i.D.RA, ra);
f.UpdateCR(0, ra);
return 0; return 0;
} }
@ -688,10 +688,10 @@ XEEMITTER(cntlzdx, 0x7C000074, X )(PPCHIRBuilder& f, InstrData& i) {
// RA <- n // RA <- n
Value* v = f.CountLeadingZeros(f.LoadGPR(i.X.RT)); Value* v = f.CountLeadingZeros(f.LoadGPR(i.X.RT));
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.X.RA, v);
return 0; return 0;
} }
@ -704,10 +704,10 @@ XEEMITTER(cntlzwx, 0x7C000034, X )(PPCHIRBuilder& f, InstrData& i) {
Value* v = f.CountLeadingZeros( Value* v = f.CountLeadingZeros(
f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)); f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.X.RA, v);
return 0; return 0;
} }
@ -715,10 +715,10 @@ XEEMITTER(eqvx, 0x7C000238, X )(PPCHIRBuilder& f, InstrData& i) {
// RA <- (RS) == (RB) // RA <- (RS) == (RB)
Value* ra = f.Xor(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); Value* ra = f.Xor(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
ra = f.Not(ra); ra = f.Not(ra);
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -728,10 +728,10 @@ XEEMITTER(extsbx, 0x7C000774, X )(PPCHIRBuilder& f, InstrData& i) {
// RA[0:55] <- i56.s // RA[0:55] <- i56.s
Value* rt = f.LoadGPR(i.X.RT); Value* rt = f.LoadGPR(i.X.RT);
rt = f.SignExtend(f.Truncate(rt, INT8_TYPE), INT64_TYPE); rt = f.SignExtend(f.Truncate(rt, INT8_TYPE), INT64_TYPE);
f.StoreGPR(i.X.RA, rt);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, rt); f.UpdateCR(0, rt);
} }
f.StoreGPR(i.X.RA, rt);
return 0; return 0;
} }
@ -741,10 +741,10 @@ XEEMITTER(extshx, 0x7C000734, X )(PPCHIRBuilder& f, InstrData& i) {
// RA[0:47] <- 48.s // RA[0:47] <- 48.s
Value* rt = f.LoadGPR(i.X.RT); Value* rt = f.LoadGPR(i.X.RT);
rt = f.SignExtend(f.Truncate(rt, INT16_TYPE), INT64_TYPE); rt = f.SignExtend(f.Truncate(rt, INT16_TYPE), INT64_TYPE);
f.StoreGPR(i.X.RA, rt);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, rt); f.UpdateCR(0, rt);
} }
f.StoreGPR(i.X.RA, rt);
return 0; return 0;
} }
@ -754,10 +754,10 @@ XEEMITTER(extswx, 0x7C0007B4, X )(PPCHIRBuilder& f, InstrData& i) {
// RA[0:31] <- i32.s // RA[0:31] <- i32.s
Value* rt = f.LoadGPR(i.X.RT); Value* rt = f.LoadGPR(i.X.RT);
rt = f.SignExtend(f.Truncate(rt, INT32_TYPE), INT64_TYPE); rt = f.SignExtend(f.Truncate(rt, INT32_TYPE), INT64_TYPE);
f.StoreGPR(i.X.RA, rt);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, rt); f.UpdateCR(0, rt);
} }
f.StoreGPR(i.X.RA, rt);
return 0; return 0;
} }
@ -767,10 +767,10 @@ XEEMITTER(nandx, 0x7C0003B8, X )(PPCHIRBuilder& f, InstrData& i) {
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.LoadGPR(i.X.RB)); f.LoadGPR(i.X.RB));
ra = f.Not(ra); ra = f.Not(ra);
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -780,10 +780,10 @@ XEEMITTER(norx, 0x7C0000F8, X )(PPCHIRBuilder& f, InstrData& i) {
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.LoadGPR(i.X.RB)); f.LoadGPR(i.X.RB));
ra = f.Not(ra); ra = f.Not(ra);
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -803,10 +803,10 @@ XEEMITTER(orx, 0x7C000378, X )(PPCHIRBuilder& f, InstrData& i) {
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.LoadGPR(i.X.RB)); f.LoadGPR(i.X.RB));
} }
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -815,10 +815,10 @@ XEEMITTER(orcx, 0x7C000338, X )(PPCHIRBuilder& f, InstrData& i) {
Value* ra = f.Or( Value* ra = f.Or(
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.Not(f.LoadGPR(i.X.RB))); f.Not(f.LoadGPR(i.X.RB)));
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -849,10 +849,10 @@ XEEMITTER(xorx, 0x7C000278, X )(PPCHIRBuilder& f, InstrData& i) {
Value* ra = f.Xor( Value* ra = f.Xor(
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.LoadGPR(i.X.RB)); f.LoadGPR(i.X.RB));
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -895,10 +895,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
if (m != 0xFFFFFFFFFFFFFFFF) { if (m != 0xFFFFFFFFFFFFFFFF) {
v = f.And(v, f.LoadConstant(m)); v = f.And(v, f.LoadConstant(m));
} }
f.StoreGPR(i.MD.RA, v);
if (i.MD.Rc) { if (i.MD.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.MD.RA, v);
return 0; return 0;
} else if (i.MD.idx == 1) { } else if (i.MD.idx == 1) {
// XEEMITTER(rldicrx, 0x78000004, MD ) // XEEMITTER(rldicrx, 0x78000004, MD )
@ -922,10 +922,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
v = f.And(v, f.LoadConstant(m)); v = f.And(v, f.LoadConstant(m));
} }
} }
f.StoreGPR(i.MD.RA, v);
if (i.MD.Rc) { if (i.MD.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.MD.RA, v);
return 0; return 0;
} else if (i.MD.idx == 2) { } else if (i.MD.idx == 2) {
// XEEMITTER(rldicx, 0x78000008, MD ) // XEEMITTER(rldicx, 0x78000008, MD )
@ -959,10 +959,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
f.And(v, f.LoadConstant(m)), f.And(v, f.LoadConstant(m)),
f.And(ra, f.LoadConstant(~m))); f.And(ra, f.LoadConstant(~m)));
} }
f.StoreGPR(i.MD.RA, v);
if (i.MD.Rc) { if (i.MD.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.MD.RA, v);
return 0; return 0;
} else { } else {
XEINSTRNOTIMPLEMENTED(); XEINSTRNOTIMPLEMENTED();
@ -987,10 +987,10 @@ XEEMITTER(rlwimix, 0x50000000, M )(PPCHIRBuilder& f, InstrData& i) {
} }
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstant((~(uint64_t)m)))); v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstant((~(uint64_t)m))));
f.StoreGPR(i.M.RA, v);
if (i.M.Rc) { if (i.M.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.M.RA, v);
return 0; return 0;
} }
@ -1014,10 +1014,10 @@ XEEMITTER(rlwinmx, 0x54000000, M )(PPCHIRBuilder& f, InstrData& i) {
v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32))); v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32)));
} }
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.M.RA, v);
if (i.M.Rc) { if (i.M.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.M.RA, v);
return 0; return 0;
} }
@ -1036,10 +1036,10 @@ XEEMITTER(rlwnmx, 0x5C000000, M )(PPCHIRBuilder& f, InstrData& i) {
v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32))); v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32)));
} }
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.M.RA, v);
if (i.M.Rc) { if (i.M.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.M.RA, v);
return 0; return 0;
} }
@ -1146,7 +1146,7 @@ XEEMITTER(sradx, 0x7C000634, X )(PPCHIRBuilder& f, InstrData& i) {
// CA is set to 1 if the low-order 32 bits of (RS) contain a negative number // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number
// and any 1-bits are shifted out of position 63; otherwise CA is set to 0. // and any 1-bits are shifted out of position 63; otherwise CA is set to 0.
// We already have ca set to indicate the pos 63 bit, now just and in sign. // We already have ca set to indicate the pos 63 bit, now just and in sign.
ca = f.And(ca, f.Shr(v, 63)); ca = f.And(ca, f.Truncate(f.Shr(v, 63), INT8_TYPE));
f.StoreCA(ca); f.StoreCA(ca);
f.StoreGPR(i.X.RA, v); f.StoreGPR(i.X.RA, v);
@ -1174,15 +1174,15 @@ XEEMITTER(sradix, 0x7C000674, XS )(PPCHIRBuilder& f, InstrData& i) {
XEASSERT(sh); XEASSERT(sh);
uint64_t mask = XEMASK(64 - sh, 63); uint64_t mask = XEMASK(64 - sh, 63);
Value* ca = f.And( Value* ca = f.And(
f.Shr(v, 63), f.Truncate(f.Shr(v, 63), INT8_TYPE),
f.IsTrue(f.And(v, f.LoadConstant(mask)))); f.IsTrue(f.And(v, f.LoadConstant(mask))));
f.StoreCA(ca); f.StoreCA(ca);
v = f.Sha(v, sh); v = f.Sha(v, sh);
f.StoreGPR(i.XS.RA, v);
if (i.XS.Rc) { if (i.XS.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.XS.RA, v);
return 0; return 0;
} }
@ -1203,7 +1203,7 @@ XEEMITTER(srawx, 0x7C000630, X )(PPCHIRBuilder& f, InstrData& i) {
// is negative. // is negative.
Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh)); Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh));
Value* ca = f.And( Value* ca = f.And(
f.Shr(v, 31), f.Truncate(f.Shr(v, 31), INT8_TYPE),
f.IsTrue(f.And(v, mask))); f.IsTrue(f.And(v, mask)));
f.StoreCA(ca); f.StoreCA(ca);
v = f.Sha(v, sh), v = f.Sha(v, sh),
@ -1235,8 +1235,8 @@ XEEMITTER(srawix, 0x7C000670, X )(PPCHIRBuilder& f, InstrData& i) {
// is negative. // is negative.
uint32_t mask = (uint32_t)XEMASK(64 - i.X.RB, 63); uint32_t mask = (uint32_t)XEMASK(64 - i.X.RB, 63);
ca = f.And( ca = f.And(
f.Shr(v, 31), f.Truncate(f.Shr(v, 31), INT8_TYPE),
f.ZeroExtend(f.IsTrue(f.And(v, f.LoadConstant(mask))), INT32_TYPE)); f.IsTrue(f.And(v, f.LoadConstant(mask))));
v = f.Sha(v, (int8_t)i.X.RB), v = f.Sha(v, (int8_t)i.X.RB),
v = f.SignExtend(v, INT64_TYPE); v = f.SignExtend(v, INT64_TYPE);

View File

@ -240,18 +240,18 @@ void PPCHIRBuilder::UpdateCR(
void PPCHIRBuilder::UpdateCR( void PPCHIRBuilder::UpdateCR(
uint32_t n, Value* lhs, Value* rhs, bool is_signed) { uint32_t n, Value* lhs, Value* rhs, bool is_signed) {
Value* lt;
Value* gt;
if (is_signed) { if (is_signed) {
lt = CompareSLT(lhs, rhs); Value* lt = CompareSLT(lhs, rhs);
gt = CompareSGT(lhs, rhs); StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
Value* gt = CompareSGT(lhs, rhs);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
} else { } else {
lt = CompareULT(lhs, rhs); Value* lt = CompareULT(lhs, rhs);
gt = CompareUGT(lhs, rhs); StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
Value* gt = CompareUGT(lhs, rhs);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
} }
Value* eq = CompareEQ(lhs, rhs); Value* eq = CompareEQ(lhs, rhs);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 2, eq); StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 2, eq);
// Value* so = AllocValue(UINT8_TYPE); // Value* so = AllocValue(UINT8_TYPE);
@ -280,7 +280,7 @@ Value* PPCHIRBuilder::LoadCA() {
} }
void PPCHIRBuilder::StoreCA(Value* value) { void PPCHIRBuilder::StoreCA(Value* value) {
value = Truncate(value, INT8_TYPE); XEASSERT(value->type == INT8_TYPE);
StoreContext(offsetof(PPCContext, xer_ca), value); StoreContext(offsetof(PPCContext, xer_ca), value);
} }

39
src/alloy/hir/block.cc Normal file
View File

@ -0,0 +1,39 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/hir/block.h>
#include <alloy/hir/instr.h>
using namespace alloy;
using namespace alloy::hir;
void Block::AssertNoCycles() {
Instr* hare = instr_head;
Instr* tortoise = instr_head;
if (!hare) {
return;
}
while (hare = hare->next) {
if (hare == tortoise) {
// Cycle!
XEASSERTALWAYS();
}
hare = hare->next;
if (hare == tortoise) {
// Cycle!
XEASSERTALWAYS();
}
tortoise = tortoise->next;
if (!hare || !tortoise) {
return;
}
}
}

View File

@ -61,6 +61,8 @@ public:
Instr* instr_tail; Instr* instr_tail;
uint16_t ordinal; uint16_t ordinal;
void AssertNoCycles();
}; };

View File

@ -92,7 +92,7 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) {
case INT8_TYPE: str->Append("%X", value->constant.i8); break; case INT8_TYPE: str->Append("%X", value->constant.i8); break;
case INT16_TYPE: str->Append("%X", value->constant.i16); break; case INT16_TYPE: str->Append("%X", value->constant.i16); break;
case INT32_TYPE: str->Append("%X", value->constant.i32); break; case INT32_TYPE: str->Append("%X", value->constant.i32); break;
case INT64_TYPE: str->Append("%X", value->constant.i64); break; case INT64_TYPE: str->Append("%llX", value->constant.i64); break;
case FLOAT32_TYPE: str->Append("%F", value->constant.f32); break; case FLOAT32_TYPE: str->Append("%F", value->constant.f32); break;
case FLOAT64_TYPE: str->Append("%F", value->constant.f64); break; case FLOAT64_TYPE: str->Append("%F", value->constant.f64); break;
case VEC128_TYPE: str->Append("(%F,%F,%F,%F)", case VEC128_TYPE: str->Append("(%F,%F,%F,%F)",
@ -252,6 +252,29 @@ void HIRBuilder::Dump(StringBuffer* str) {
} }
} }
void HIRBuilder::AssertNoCycles() {
Block* hare = block_head_;
Block* tortoise = block_head_;
if (!hare) {
return;
}
while (hare = hare->next) {
if (hare == tortoise) {
// Cycle!
XEASSERTALWAYS();
}
hare = hare->next;
if (hare == tortoise) {
// Cycle!
XEASSERTALWAYS();
}
tortoise = tortoise->next;
if (!hare || !tortoise) {
return;
}
}
}
Block* HIRBuilder::current_block() const { Block* HIRBuilder::current_block() const {
return current_block_; return current_block_;
} }
@ -1729,16 +1752,19 @@ Value* HIRBuilder::Extract(Value* value, Value* index,
TypeName target_type) { TypeName target_type) {
// TODO(benvanik): could do some of this as constants. // TODO(benvanik): could do some of this as constants.
Value* trunc_index = index->type != INT8_TYPE ?
Truncate(index, INT8_TYPE) : index;
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_EXTRACT_info, 0, OPCODE_EXTRACT_info, 0,
AllocValue(target_type)); AllocValue(target_type));
i->set_src1(value); i->set_src1(value);
i->set_src2(ZeroExtend(index, INT64_TYPE)); i->set_src2(trunc_index);
i->src3.value = NULL; i->src3.value = NULL;
return i->dest; return i->dest;
} }
Value* HIRBuilder::Extract(Value* value, uint64_t index, Value* HIRBuilder::Extract(Value* value, uint8_t index,
TypeName target_type) { TypeName target_type) {
return Extract(value, LoadConstant(index), target_type); return Extract(value, LoadConstant(index), target_type);
} }

View File

@ -35,6 +35,7 @@ public:
virtual int Finalize(); virtual int Finalize();
void Dump(StringBuffer* str); void Dump(StringBuffer* str);
void AssertNoCycles();
Arena* arena() const { return arena_; } Arena* arena() const { return arena_; }
@ -196,7 +197,7 @@ public:
Value* Insert(Value* value, Value* index, Value* part); Value* Insert(Value* value, Value* index, Value* part);
Value* Insert(Value* value, uint64_t index, Value* part); Value* Insert(Value* value, uint64_t index, Value* part);
Value* Extract(Value* value, Value* index, TypeName target_type); Value* Extract(Value* value, Value* index, TypeName target_type);
Value* Extract(Value* value, uint64_t index, TypeName target_type); Value* Extract(Value* value, uint8_t index, TypeName target_type);
// i8->i16/i32/... (i8|i8 / i8|i8|i8|i8 / ...) // i8->i16/i32/... (i8|i8 / i8|i8|i8|i8 / ...)
// i8/i16/i32 -> vec128 // i8/i16/i32 -> vec128
Value* Splat(Value* value, TypeName target_type); Value* Splat(Value* value, TypeName target_type);

View File

@ -48,19 +48,6 @@ void Instr::set_src3(Value* value) {
src3_use = value ? value->AddUse(block->arena, this) : NULL; src3_use = value ? value->AddUse(block->arena, this) : NULL;
} }
bool Instr::Match(SignatureType dest_req,
SignatureType src1_req,
SignatureType src2_req,
SignatureType src3_req) const {
#define TO_SIG_TYPE(v) \
(v ? (v->IsConstant() ? SignatureType((v->type + 1) | SIG_TYPE_C) : SignatureType(v->type + 1)) : SIG_TYPE_X)
return
((dest_req == SIG_TYPE_IGNORE) || (dest_req == TO_SIG_TYPE(dest))) &&
((src1_req == SIG_TYPE_IGNORE) || (src1_req == TO_SIG_TYPE(src1.value))) &&
((src2_req == SIG_TYPE_IGNORE) || (src2_req == TO_SIG_TYPE(src2.value))) &&
((src3_req == SIG_TYPE_IGNORE) || (src3_req == TO_SIG_TYPE(src3.value)));
}
void Instr::MoveBefore(Instr* other) { void Instr::MoveBefore(Instr* other) {
if (next == other) { if (next == other) {
return; return;

View File

@ -24,26 +24,6 @@ namespace hir {
class Block; class Block;
class Label; class Label;
enum SignatureType {
SIG_TYPE_X = 0,
SIG_TYPE_I8 = 1,
SIG_TYPE_I16 = 2,
SIG_TYPE_I32 = 3,
SIG_TYPE_I64 = 4,
SIG_TYPE_F32 = 5,
SIG_TYPE_F64 = 6,
SIG_TYPE_V128 = 7,
SIG_TYPE_C = (1 << 3),
SIG_TYPE_I8C = SIG_TYPE_C | SIG_TYPE_I8,
SIG_TYPE_I16C = SIG_TYPE_C | SIG_TYPE_I16,
SIG_TYPE_I32C = SIG_TYPE_C | SIG_TYPE_I32,
SIG_TYPE_I64C = SIG_TYPE_C | SIG_TYPE_I64,
SIG_TYPE_F32C = SIG_TYPE_C | SIG_TYPE_F32,
SIG_TYPE_F64C = SIG_TYPE_C | SIG_TYPE_F64,
SIG_TYPE_V128C = SIG_TYPE_C | SIG_TYPE_V128,
SIG_TYPE_IGNORE = 0xFF,
};
class Instr { class Instr {
public: public:
Block* block; Block* block;
@ -74,11 +54,6 @@ public:
void set_src2(Value* value); void set_src2(Value* value);
void set_src3(Value* value); void set_src3(Value* value);
bool Match(SignatureType dest = SIG_TYPE_X,
SignatureType src1 = SIG_TYPE_X,
SignatureType src2 = SIG_TYPE_X,
SignatureType src3 = SIG_TYPE_X) const;
void MoveBefore(Instr* other); void MoveBefore(Instr* other);
void Replace(const OpcodeInfo* opcode, uint16_t flags); void Replace(const OpcodeInfo* opcode, uint16_t flags);
void Remove(); void Remove();

View File

@ -11,590 +11,590 @@
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMMENT, OPCODE_COMMENT,
"comment", "comment",
OPCODE_SIG_X, OPCODE_SIG_X_O,
OPCODE_FLAG_IGNORE); OPCODE_FLAG_IGNORE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_NOP, OPCODE_NOP,
"nop", "nop",
OPCODE_SIG_X, OPCODE_SIG_X,
OPCODE_FLAG_IGNORE); OPCODE_FLAG_IGNORE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SOURCE_OFFSET, OPCODE_SOURCE_OFFSET,
"source_offset", "source_offset",
OPCODE_SIG_X_O, OPCODE_SIG_X_O,
OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE); OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DEBUG_BREAK, OPCODE_DEBUG_BREAK,
"debug_break", "debug_break",
OPCODE_SIG_X, OPCODE_SIG_X,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DEBUG_BREAK_TRUE, OPCODE_DEBUG_BREAK_TRUE,
"debug_break_true", "debug_break_true",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_TRAP, OPCODE_TRAP,
"trap", "trap",
OPCODE_SIG_X, OPCODE_SIG_X,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_TRAP_TRUE, OPCODE_TRAP_TRUE,
"trap_true", "trap_true",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL, OPCODE_CALL,
"call", "call",
OPCODE_SIG_X_S, OPCODE_SIG_X_S,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL_TRUE, OPCODE_CALL_TRUE,
"call_true", "call_true",
OPCODE_SIG_X_V_S, OPCODE_SIG_X_V_S,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL_INDIRECT, OPCODE_CALL_INDIRECT,
"call_indirect", "call_indirect",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL_INDIRECT_TRUE, OPCODE_CALL_INDIRECT_TRUE,
"call_indirect_true", "call_indirect_true",
OPCODE_SIG_X_V_V, OPCODE_SIG_X_V_V,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL_EXTERN, OPCODE_CALL_EXTERN,
"call_extern", "call_extern",
OPCODE_SIG_X_S, OPCODE_SIG_X_S,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_RETURN, OPCODE_RETURN,
"return", "return",
OPCODE_SIG_X, OPCODE_SIG_X,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_RETURN_TRUE, OPCODE_RETURN_TRUE,
"return_true", "return_true",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SET_RETURN_ADDRESS, OPCODE_SET_RETURN_ADDRESS,
"set_return_address", "set_return_address",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BRANCH, OPCODE_BRANCH,
"branch", "branch",
OPCODE_SIG_X_L, OPCODE_SIG_X_L,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BRANCH_TRUE, OPCODE_BRANCH_TRUE,
"branch_true", "branch_true",
OPCODE_SIG_X_V_L, OPCODE_SIG_X_V_L,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BRANCH_FALSE, OPCODE_BRANCH_FALSE,
"branch_false", "branch_false",
OPCODE_SIG_X_V_L, OPCODE_SIG_X_V_L,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ASSIGN, OPCODE_ASSIGN,
"assign", "assign",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CAST, OPCODE_CAST,
"cast", "cast",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ZERO_EXTEND, OPCODE_ZERO_EXTEND,
"zero_extend", "zero_extend",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SIGN_EXTEND, OPCODE_SIGN_EXTEND,
"sign_extend", "sign_extend",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_TRUNCATE, OPCODE_TRUNCATE,
"truncate", "truncate",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CONVERT, OPCODE_CONVERT,
"convert", "convert",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ROUND, OPCODE_ROUND,
"round", "round",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_CONVERT_I2F, OPCODE_VECTOR_CONVERT_I2F,
"vector_convert_i2f", "vector_convert_i2f",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_CONVERT_F2I, OPCODE_VECTOR_CONVERT_F2I,
"vector_convert_f2i", "vector_convert_f2i",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_VECTOR_SHL, OPCODE_LOAD_VECTOR_SHL,
"load_vector_shl", "load_vector_shl",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_VECTOR_SHR, OPCODE_LOAD_VECTOR_SHR,
"load_vector_shr", "load_vector_shr",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_CLOCK, OPCODE_LOAD_CLOCK,
"load_clock", "load_clock",
OPCODE_SIG_V, OPCODE_SIG_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_LOCAL, OPCODE_LOAD_LOCAL,
"load_local", "load_local",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_STORE_LOCAL, OPCODE_STORE_LOCAL,
"store_local", "store_local",
OPCODE_SIG_X_V_V, OPCODE_SIG_X_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_CONTEXT, OPCODE_LOAD_CONTEXT,
"load_context", "load_context",
OPCODE_SIG_V_O, OPCODE_SIG_V_O,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_STORE_CONTEXT, OPCODE_STORE_CONTEXT,
"store_context", "store_context",
OPCODE_SIG_X_O_V, OPCODE_SIG_X_O_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD, OPCODE_LOAD,
"load", "load",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
OPCODE_FLAG_MEMORY); OPCODE_FLAG_MEMORY)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_STORE, OPCODE_STORE,
"store", "store",
OPCODE_SIG_X_V_V, OPCODE_SIG_X_V_V,
OPCODE_FLAG_MEMORY); OPCODE_FLAG_MEMORY)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PREFETCH, OPCODE_PREFETCH,
"prefetch", "prefetch",
OPCODE_SIG_X_V_O, OPCODE_SIG_X_V_O,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MAX, OPCODE_MAX,
"max", "max",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MIN, OPCODE_MIN,
"min", "min",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SELECT, OPCODE_SELECT,
"select", "select",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_IS_TRUE, OPCODE_IS_TRUE,
"is_true", "is_true",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_IS_FALSE, OPCODE_IS_FALSE,
"is_false", "is_false",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_EQ, OPCODE_COMPARE_EQ,
"compare_eq", "compare_eq",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_NE, OPCODE_COMPARE_NE,
"compare_ne", "compare_ne",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_SLT, OPCODE_COMPARE_SLT,
"compare_slt", "compare_slt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_SLE, OPCODE_COMPARE_SLE,
"compare_sle", "compare_sle",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_SGT, OPCODE_COMPARE_SGT,
"compare_sgt", "compare_sgt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_SGE, OPCODE_COMPARE_SGE,
"compare_sge", "compare_sge",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_ULT, OPCODE_COMPARE_ULT,
"compare_ult", "compare_ult",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_ULE, OPCODE_COMPARE_ULE,
"compare_ule", "compare_ule",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_UGT, OPCODE_COMPARE_UGT,
"compare_ugt", "compare_ugt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_UGE, OPCODE_COMPARE_UGE,
"compare_uge", "compare_uge",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DID_CARRY, OPCODE_DID_CARRY,
"did_carry", "did_carry",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
OPCODE_FLAG_PAIRED_PREV); OPCODE_FLAG_PAIRED_PREV)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DID_OVERFLOW, OPCODE_DID_OVERFLOW,
"did_overflow", "did_overflow",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
OPCODE_FLAG_PAIRED_PREV); OPCODE_FLAG_PAIRED_PREV)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DID_SATURATE, OPCODE_DID_SATURATE,
"did_saturate", "did_saturate",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
OPCODE_FLAG_PAIRED_PREV); OPCODE_FLAG_PAIRED_PREV)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_EQ, OPCODE_VECTOR_COMPARE_EQ,
"vector_compare_eq", "vector_compare_eq",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_SGT, OPCODE_VECTOR_COMPARE_SGT,
"vector_compare_sgt", "vector_compare_sgt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_SGE, OPCODE_VECTOR_COMPARE_SGE,
"vector_compare_sge", "vector_compare_sge",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_UGT, OPCODE_VECTOR_COMPARE_UGT,
"vector_compare_ugt", "vector_compare_ugt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_UGE, OPCODE_VECTOR_COMPARE_UGE,
"vector_compare_uge", "vector_compare_uge",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ADD, OPCODE_ADD,
"add", "add",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ADD_CARRY, OPCODE_ADD_CARRY,
"add_carry", "add_carry",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_ADD, OPCODE_VECTOR_ADD,
"vector_add", "vector_add",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SUB, OPCODE_SUB,
"sub", "sub",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL, OPCODE_MUL,
"mul", "mul",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL_HI, OPCODE_MUL_HI,
"mul_hi", "mul_hi",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DIV, OPCODE_DIV,
"div", "div",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL_ADD, OPCODE_MUL_ADD,
"mul_add", "mul_add",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL_SUB, OPCODE_MUL_SUB,
"mul_sub", "mul_sub",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_NEG, OPCODE_NEG,
"neg", "neg",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ABS, OPCODE_ABS,
"abs", "abs",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SQRT, OPCODE_SQRT,
"sqrt", "sqrt",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_RSQRT, OPCODE_RSQRT,
"rsqrt", "rsqrt",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_POW2, OPCODE_POW2,
"pow2", "pow2",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOG2, OPCODE_LOG2,
"log2", "log2",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DOT_PRODUCT_3, OPCODE_DOT_PRODUCT_3,
"dot_product_3", "dot_product_3",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DOT_PRODUCT_4, OPCODE_DOT_PRODUCT_4,
"dot_product_4", "dot_product_4",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_AND, OPCODE_AND,
"and", "and",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_OR, OPCODE_OR,
"or", "or",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_XOR, OPCODE_XOR,
"xor", "xor",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_NOT, OPCODE_NOT,
"not", "not",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SHL, OPCODE_SHL,
"shl", "shl",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_SHL, OPCODE_VECTOR_SHL,
"vector_shl", "vector_shl",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SHR, OPCODE_SHR,
"shr", "shr",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_SHR, OPCODE_VECTOR_SHR,
"vector_shr", "vector_shr",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SHA, OPCODE_SHA,
"sha", "sha",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_SHA, OPCODE_VECTOR_SHA,
"vector_sha", "vector_sha",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ROTATE_LEFT, OPCODE_ROTATE_LEFT,
"rotate_left", "rotate_left",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BYTE_SWAP, OPCODE_BYTE_SWAP,
"byte_swap", "byte_swap",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CNTLZ, OPCODE_CNTLZ,
"cntlz", "cntlz",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_INSERT, OPCODE_INSERT,
"insert", "insert",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_EXTRACT, OPCODE_EXTRACT,
"extract", "extract",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SPLAT, OPCODE_SPLAT,
"splat", "splat",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PERMUTE, OPCODE_PERMUTE,
"permute", "permute",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SWIZZLE, OPCODE_SWIZZLE,
"swizzle", "swizzle",
OPCODE_SIG_V_V_O, OPCODE_SIG_V_V_O,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PACK, OPCODE_PACK,
"pack", "pack",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_UNPACK, OPCODE_UNPACK,
"unpack", "unpack",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_EXCHANGE, OPCODE_COMPARE_EXCHANGE,
"compare_exchange", "compare_exchange",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ATOMIC_EXCHANGE, OPCODE_ATOMIC_EXCHANGE,
"atomic_exchange", "atomic_exchange",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ATOMIC_ADD, OPCODE_ATOMIC_ADD,
"atomic_add", "atomic_add",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ATOMIC_SUB, OPCODE_ATOMIC_SUB,
"atomic_sub", "atomic_sub",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)

View File

@ -1,6 +1,7 @@
# Copyright 2013 Ben Vanik. All Rights Reserved. # Copyright 2013 Ben Vanik. All Rights Reserved.
{ {
'sources': [ 'sources': [
'block.cc',
'block.h', 'block.h',
'hir_builder.cc', 'hir_builder.cc',
'hir_builder.h', 'hir_builder.h',

View File

@ -560,6 +560,26 @@ void Value::ByteSwap() {
} }
} }
void Value::CountLeadingZeros(const ConstantValue& src) {
switch (type) {
case INT8_TYPE:
constant.i8 = __lzcnt16(src.i8) - 8;
break;
case INT16_TYPE:
constant.i8 = __lzcnt16(src.i16);
break;
case INT32_TYPE:
constant.i8 = __lzcnt(src.i32);
break;
case INT64_TYPE:
constant.i8 = __lzcnt64(src.i64);
break;
default:
XEASSERTALWAYS();
break;
}
}
bool Value::Compare(Opcode opcode, Value* other) { bool Value::Compare(Opcode opcode, Value* other) {
// TODO(benvanik): big matrix. // TODO(benvanik): big matrix.
XEASSERTALWAYS(); XEASSERTALWAYS();

View File

@ -68,6 +68,10 @@ enum ValueFlags {
VALUE_IS_ALLOCATED = (1 << 2), // Used by backends. Do not set. VALUE_IS_ALLOCATED = (1 << 2), // Used by backends. Do not set.
}; };
struct RegAssignment {
const backend::MachineInfo::RegisterSet* set;
int32_t index;
};
class Value { class Value {
public: public:
@ -91,10 +95,7 @@ public:
TypeName type; TypeName type;
uint32_t flags; uint32_t flags;
struct { RegAssignment reg;
const backend::MachineInfo::RegisterSet* set;
int32_t index;
} reg;
ConstantValue constant; ConstantValue constant;
Instr* def; Instr* def;
@ -392,6 +393,7 @@ public:
void Shr(Value* other); void Shr(Value* other);
void Sha(Value* other); void Sha(Value* other);
void ByteSwap(); void ByteSwap();
void CountLeadingZeros(const ConstantValue& src);
bool Compare(Opcode opcode, Value* other); bool Compare(Opcode opcode, Value* other);
}; };

2
third_party/xbyak vendored

@ -1 +1 @@
Subproject commit 702d6e6683c322f08a36ea059f6d6f8263b1bd0d Subproject commit 2d599b3bd64a6d13c8b47a5f7410c67837bfff5d

View File

@ -24,6 +24,18 @@
'target_arch%': 'x64', 'target_arch%': 'x64',
}, },
'conditions': [
['OS=="win"', {
'variables': {
'move_command%': 'move'
},
}, {
'variables': {
'move_command%': 'mv'
},
}]
],
'target_defaults': { 'target_defaults': {
'include_dirs': [ 'include_dirs': [
'include/', 'include/',
@ -255,6 +267,7 @@
'include_dirs': [ 'include_dirs': [
'.', '.',
'src/', 'src/',
'<(INTERMEDIATE_DIR)',
], ],
'includes': [ 'includes': [