Trying out a new style of JIT pattern matching.
This commit is contained in:
parent
a001714fb0
commit
5a85263e5f
|
@ -40,10 +40,10 @@ namespace ivm {
|
|||
#define DPRINT
|
||||
#define DFLUSH()
|
||||
|
||||
//#define IPRINT if (ics.thread_state->thread_id() == 1) printf
|
||||
//#define IFLUSH() fflush(stdout)
|
||||
//#define DPRINT if (ics.thread_state->thread_id() == 1) printf
|
||||
//#define DFLUSH() fflush(stdout)
|
||||
#define IPRINT if (ics.thread_state->thread_id() == 1) printf
|
||||
#define IFLUSH() fflush(stdout)
|
||||
#define DPRINT if (ics.thread_state->thread_id() == 1) printf
|
||||
#define DFLUSH() fflush(stdout)
|
||||
|
||||
#if XE_CPU_BIGENDIAN
|
||||
#define VECB16(v,n) (v.b16[n])
|
||||
|
@ -1364,31 +1364,31 @@ int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) {
|
|||
}
|
||||
|
||||
uint32_t IntCode_LOAD_LOCAL_I8(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u64));
|
||||
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_I16(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u64));
|
||||
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_I32(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u64));
|
||||
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_I64(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u64));
|
||||
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_F32(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].f32 = *((float*)(ics.locals + ics.rf[i->src1_reg].u64));
|
||||
ics.rf[i->dest_reg].f32 = *((float*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_F64(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].f64 = *((double*)(ics.locals + ics.rf[i->src1_reg].u64));
|
||||
ics.rf[i->dest_reg].f64 = *((double*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_V128(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u64));
|
||||
ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) {
|
||||
|
@ -1405,31 +1405,31 @@ int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) {
|
|||
}
|
||||
|
||||
uint32_t IntCode_STORE_LOCAL_I8(IntCodeState& ics, const IntCode* i) {
|
||||
*((int8_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i8;
|
||||
*((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i8;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_I16(IntCodeState& ics, const IntCode* i) {
|
||||
*((int16_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i16;
|
||||
*((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i16;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_I32(IntCodeState& ics, const IntCode* i) {
|
||||
*((int32_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i32;
|
||||
*((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i32;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_I64(IntCodeState& ics, const IntCode* i) {
|
||||
*((int64_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i64;
|
||||
*((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i64;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_F32(IntCodeState& ics, const IntCode* i) {
|
||||
*((float*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f32;
|
||||
*((float*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f32;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_F64(IntCodeState& ics, const IntCode* i) {
|
||||
*((double*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f64;
|
||||
*((double*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f64;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_V128(IntCodeState& ics, const IntCode* i) {
|
||||
*((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].v128;
|
||||
*((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].v128;
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_STORE_LOCAL(TranslationContext& ctx, Instr* i) {
|
||||
|
@ -3715,17 +3715,17 @@ int Translate_CNTLZ(TranslationContext& ctx, Instr* i) {
|
|||
|
||||
uint32_t IntCode_EXTRACT_INT8_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||
ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i64);
|
||||
ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_EXTRACT_INT16_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||
ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i64);
|
||||
ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_EXTRACT_INT32_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||
ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i64);
|
||||
ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_EXTRACT(TranslationContext& ctx, Instr* i) {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,71 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/backend/x64/lowering/lowering_table.h>
|
||||
|
||||
#include <alloy/backend/x64/x64_emitter.h>
|
||||
#include <alloy/backend/x64/lowering/lowering_sequences.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend::x64;
|
||||
using namespace alloy::backend::x64::lowering;
|
||||
|
||||
|
||||
LoweringTable::LoweringTable(X64Backend* backend) :
|
||||
backend_(backend) {
|
||||
xe_zero_struct(lookup_, sizeof(lookup_));
|
||||
}
|
||||
|
||||
LoweringTable::~LoweringTable() {
|
||||
for (size_t n = 0; n < XECOUNT(lookup_); n++) {
|
||||
auto entry = lookup_[n];
|
||||
while (entry) {
|
||||
auto next = entry->next;
|
||||
delete entry;
|
||||
entry = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int LoweringTable::Initialize() {
|
||||
RegisterSequences(this);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void LoweringTable::AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn) {
|
||||
auto existing_entry = lookup_[starting_opcode];
|
||||
auto new_entry = new sequence_fn_entry_t();
|
||||
new_entry->fn = fn;
|
||||
new_entry->next = existing_entry;
|
||||
lookup_[starting_opcode] = new_entry;
|
||||
}
|
||||
|
||||
int LoweringTable::ProcessBlock(X64Emitter& e, hir::Block* block) {
|
||||
// Process instructions.
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
bool processed = false;
|
||||
auto entry = lookup_[instr->opcode->num];
|
||||
while (entry) {
|
||||
if ((*entry->fn)(e, instr)) {
|
||||
processed = true;
|
||||
break;
|
||||
}
|
||||
entry = entry->next;
|
||||
}
|
||||
if (!processed) {
|
||||
// No sequence found!
|
||||
XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
|
||||
return 1;
|
||||
instr = e.Advance(instr);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
|
||||
#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/hir/hir_builder.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
class X64Backend;
|
||||
class X64Emitter;
|
||||
namespace lowering {
|
||||
|
||||
|
||||
class LoweringTable {
|
||||
public:
|
||||
LoweringTable(X64Backend* backend);
|
||||
~LoweringTable();
|
||||
|
||||
int Initialize();
|
||||
|
||||
int ProcessBlock(X64Emitter& e, hir::Block* block);
|
||||
|
||||
public:
|
||||
typedef bool(*sequence_fn_t)(X64Emitter& e, hir::Instr*& instr);
|
||||
void AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn);
|
||||
|
||||
private:
|
||||
class sequence_fn_entry_t {
|
||||
public:
|
||||
sequence_fn_t fn;
|
||||
sequence_fn_entry_t* next;
|
||||
};
|
||||
|
||||
// NOTE: this class is shared by multiple threads and is not thread safe.
|
||||
// Do not modify anything after init.
|
||||
X64Backend* backend_;
|
||||
sequence_fn_entry_t* lookup_[hir::__OPCODE_MAX_VALUE];
|
||||
};
|
||||
|
||||
|
||||
} // namespace lowering
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -1,12 +0,0 @@
|
|||
# Copyright 2013 Ben Vanik. All Rights Reserved.
|
||||
{
|
||||
'sources': [
|
||||
'lowering_sequences.cc',
|
||||
'lowering_sequences.h',
|
||||
'lowering_table.cc',
|
||||
'lowering_table.h',
|
||||
'op_utils.inl',
|
||||
'tracers.cc',
|
||||
'tracers.h',
|
||||
],
|
||||
}
|
|
@ -12,11 +12,12 @@
|
|||
'x64_emitter.h',
|
||||
'x64_function.cc',
|
||||
'x64_function.h',
|
||||
'x64_sequence.inl',
|
||||
'x64_sequences.cc',
|
||||
'x64_sequences.h',
|
||||
'x64_thunk_emitter.cc',
|
||||
'x64_thunk_emitter.h',
|
||||
],
|
||||
|
||||
'includes': [
|
||||
'lowering/sources.gypi',
|
||||
'x64_tracers.cc',
|
||||
'x64_tracers.h',
|
||||
],
|
||||
}
|
||||
|
|
|
@ -12,26 +12,23 @@
|
|||
#include <alloy/backend/x64/tracing.h>
|
||||
#include <alloy/backend/x64/x64_assembler.h>
|
||||
#include <alloy/backend/x64/x64_code_cache.h>
|
||||
#include <alloy/backend/x64/x64_sequences.h>
|
||||
#include <alloy/backend/x64/x64_thunk_emitter.h>
|
||||
#include <alloy/backend/x64/lowering/lowering_table.h>
|
||||
#include <alloy/backend/x64/lowering/lowering_sequences.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::backend::x64;
|
||||
using namespace alloy::backend::x64::lowering;
|
||||
using namespace alloy::runtime;
|
||||
|
||||
|
||||
X64Backend::X64Backend(Runtime* runtime) :
|
||||
code_cache_(0), lowering_table_(0),
|
||||
code_cache_(0),
|
||||
Backend(runtime) {
|
||||
}
|
||||
|
||||
X64Backend::~X64Backend() {
|
||||
alloy::tracing::WriteEvent(EventType::Deinit({
|
||||
}));
|
||||
delete lowering_table_;
|
||||
delete code_cache_;
|
||||
}
|
||||
|
||||
|
@ -41,6 +38,8 @@ int X64Backend::Initialize() {
|
|||
return result;
|
||||
}
|
||||
|
||||
RegisterSequences();
|
||||
|
||||
machine_info_.register_sets[0] = {
|
||||
0,
|
||||
"gpr",
|
||||
|
@ -68,9 +67,6 @@ int X64Backend::Initialize() {
|
|||
delete thunk_emitter;
|
||||
delete allocator;
|
||||
|
||||
lowering_table_ = new LoweringTable(this);
|
||||
RegisterSequences(lowering_table_);
|
||||
|
||||
alloy::tracing::WriteEvent(EventType::Init({
|
||||
}));
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@ namespace backend {
|
|||
namespace x64 {
|
||||
|
||||
class X64CodeCache;
|
||||
namespace lowering { class LoweringTable; }
|
||||
|
||||
|
||||
#define ALLOY_HAS_X64_BACKEND 1
|
||||
|
@ -38,8 +37,6 @@ public:
|
|||
HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; }
|
||||
GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; }
|
||||
|
||||
lowering::LoweringTable* lowering_table() const { return lowering_table_; }
|
||||
|
||||
virtual int Initialize();
|
||||
|
||||
virtual Assembler* CreateAssembler();
|
||||
|
@ -48,8 +45,6 @@ private:
|
|||
X64CodeCache* code_cache_;
|
||||
HostToGuestThunk host_to_guest_thunk_;
|
||||
GuestToHostThunk guest_to_host_thunk_;
|
||||
|
||||
lowering::LoweringTable* lowering_table_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -11,10 +11,14 @@
|
|||
|
||||
#include <alloy/backend/x64/x64_backend.h>
|
||||
#include <alloy/backend/x64/x64_code_cache.h>
|
||||
#include <alloy/backend/x64/x64_function.h>
|
||||
#include <alloy/backend/x64/x64_sequences.h>
|
||||
#include <alloy/backend/x64/x64_thunk_emitter.h>
|
||||
#include <alloy/backend/x64/lowering/lowering_table.h>
|
||||
#include <alloy/hir/hir_builder.h>
|
||||
#include <alloy/runtime/debug_info.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
#include <alloy/runtime/symbol_info.h>
|
||||
#include <alloy/runtime/thread_state.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
|
@ -31,6 +35,13 @@ namespace x64 {
|
|||
|
||||
static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024;
|
||||
|
||||
static const size_t STASH_OFFSET = 32;
|
||||
|
||||
// If we are running with tracing on we have to store the EFLAGS in the stack,
|
||||
// otherwise our calls out to C to print will clear it before DID_CARRY/etc
|
||||
// can get the value.
|
||||
#define STORE_EFLAGS 1
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
@ -145,12 +156,9 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
|||
mov(qword[rsp + StackLayout::GUEST_RCX_HOME], rcx);
|
||||
mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx);
|
||||
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0);
|
||||
// ReloadRDX:
|
||||
mov(rdx, qword[rcx + 8]); // membase
|
||||
}
|
||||
|
||||
auto lowering_table = backend_->lowering_table();
|
||||
|
||||
// Body.
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
|
@ -161,12 +169,17 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
|||
label = label->next;
|
||||
}
|
||||
|
||||
// Add instructions.
|
||||
// The table will process sequences of instructions to (try to)
|
||||
// generate optimal code.
|
||||
current_instr_ = block->instr_head;
|
||||
if (lowering_table->ProcessBlock(*this, block)) {
|
||||
return 1;
|
||||
// Process instructions.
|
||||
const Instr* instr = block->instr_head;
|
||||
while (instr) {
|
||||
const Instr* new_tail = instr;
|
||||
if (!SelectSequence(*this, instr, &new_tail)) {
|
||||
// No sequence found!
|
||||
XEASSERTALWAYS();
|
||||
XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
|
||||
break;
|
||||
}
|
||||
instr = new_tail;
|
||||
}
|
||||
|
||||
block = block->next;
|
||||
|
@ -191,16 +204,320 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
Instr* X64Emitter::Advance(Instr* i) {
|
||||
auto next = i->next;
|
||||
current_instr_ = next;
|
||||
return next;
|
||||
}
|
||||
|
||||
void X64Emitter::MarkSourceOffset(Instr* i) {
|
||||
void X64Emitter::MarkSourceOffset(const Instr* i) {
|
||||
auto entry = source_map_arena_.Alloc<SourceMapEntry>();
|
||||
entry->source_offset = i->src1.offset;
|
||||
entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal;
|
||||
entry->code_offset = getSize();
|
||||
source_map_count_++;
|
||||
}
|
||||
|
||||
void X64Emitter::DebugBreak() {
|
||||
// TODO(benvanik): notify debugger.
|
||||
db(0xCC);
|
||||
}
|
||||
|
||||
void X64Emitter::Trap() {
|
||||
// TODO(benvanik): notify debugger.
|
||||
db(0xCC);
|
||||
}
|
||||
|
||||
void X64Emitter::UnimplementedInstr(const hir::Instr* i) {
|
||||
// TODO(benvanik): notify debugger.
|
||||
db(0xCC);
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
|
||||
uint64_t ResolveFunctionSymbol(void* raw_context, uint64_t symbol_info_ptr) {
|
||||
// TODO(benvanik): generate this thunk at runtime? or a shim?
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
auto symbol_info = reinterpret_cast<FunctionInfo*>(symbol_info_ptr);
|
||||
|
||||
Function* fn = NULL;
|
||||
thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn);
|
||||
XEASSERTNOTNULL(fn);
|
||||
auto x64_fn = static_cast<X64Function*>(fn);
|
||||
return reinterpret_cast<uint64_t>(x64_fn->machine_code());
|
||||
}
|
||||
|
||||
void X64Emitter::Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info) {
|
||||
auto fn = reinterpret_cast<X64Function*>(symbol_info->function());
|
||||
// Resolve address to the function to call and store in rax.
|
||||
// TODO(benvanik): caching/etc. For now this makes debugging easier.
|
||||
if (fn) {
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn->machine_code()));
|
||||
} else {
|
||||
CallNative(ResolveFunctionSymbol, reinterpret_cast<uint64_t>(symbol_info));
|
||||
}
|
||||
|
||||
// Actually jump/call to rax.
|
||||
if (instr->flags & CALL_TAIL) {
|
||||
// Pass the callers return address over.
|
||||
mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||
|
||||
add(rsp, static_cast<uint32_t>(stack_size()));
|
||||
jmp(rax);
|
||||
} else {
|
||||
// Return address is from the previous SET_RETURN_ADDRESS.
|
||||
mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
|
||||
call(rax);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t ResolveFunctionAddress(void* raw_context, uint64_t target_address) {
|
||||
// TODO(benvanik): generate this thunk at runtime? or a shim?
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
|
||||
// TODO(benvanik): required?
|
||||
target_address &= 0xFFFFFFFF;
|
||||
|
||||
Function* fn = NULL;
|
||||
thread_state->runtime()->ResolveFunction(target_address, &fn);
|
||||
XEASSERTNOTNULL(fn);
|
||||
auto x64_fn = static_cast<X64Function*>(fn);
|
||||
return reinterpret_cast<uint64_t>(x64_fn->machine_code());
|
||||
}
|
||||
|
||||
void X64Emitter::CallIndirect(const hir::Instr* instr, const Reg64& reg) {
|
||||
// Check if return.
|
||||
if (instr->flags & CALL_POSSIBLE_RETURN) {
|
||||
cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||
je("epilog", CodeGenerator::T_NEAR);
|
||||
}
|
||||
|
||||
// Resolve address to the function to call and store in rax.
|
||||
// TODO(benvanik): caching/etc. For now this makes debugging easier.
|
||||
if (reg.getIdx() != rdx.getIdx()) {
|
||||
mov(rdx, reg);
|
||||
}
|
||||
CallNative(ResolveFunctionAddress);
|
||||
|
||||
// Actually jump/call to rax.
|
||||
if (instr->flags & CALL_TAIL) {
|
||||
// Pass the callers return address over.
|
||||
mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||
|
||||
add(rsp, static_cast<uint32_t>(stack_size()));
|
||||
jmp(rax);
|
||||
} else {
|
||||
// Return address is from the previous SET_RETURN_ADDRESS.
|
||||
mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
|
||||
call(rax);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t UndefinedCallExtern(void* raw_context, uint64_t symbol_info_ptr) {
|
||||
auto symbol_info = reinterpret_cast<FunctionInfo*>(symbol_info_ptr);
|
||||
XELOGW("undefined extern call to %.8X %s",
|
||||
symbol_info->address(),
|
||||
symbol_info->name());
|
||||
return 0;
|
||||
}
|
||||
void X64Emitter::CallExtern(const hir::Instr* instr, const FunctionInfo* symbol_info) {
|
||||
XEASSERT(symbol_info->behavior() == FunctionInfo::BEHAVIOR_EXTERN);
|
||||
if (!symbol_info->extern_handler()) {
|
||||
CallNative(UndefinedCallExtern, reinterpret_cast<uint64_t>(symbol_info));
|
||||
} else {
|
||||
// rcx = context
|
||||
// rdx = target host function
|
||||
// r8 = arg0
|
||||
// r9 = arg1
|
||||
mov(rdx, reinterpret_cast<uint64_t>(symbol_info->extern_handler()));
|
||||
mov(r8, reinterpret_cast<uint64_t>(symbol_info->extern_arg0()));
|
||||
mov(r9, reinterpret_cast<uint64_t>(symbol_info->extern_arg1()));
|
||||
auto thunk = backend()->guest_to_host_thunk();
|
||||
mov(rax, reinterpret_cast<uint64_t>(thunk));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
// rax = host return
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::CallNative(void* fn) {
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
}
|
||||
|
||||
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context)) {
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
}
|
||||
|
||||
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0)) {
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
}
|
||||
|
||||
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0) {
|
||||
mov(rdx, arg0);
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
}
|
||||
|
||||
void X64Emitter::SetReturnAddress(uint64_t value) {
|
||||
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], value);
|
||||
}
|
||||
|
||||
void X64Emitter::ReloadECX() {
|
||||
mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]);
|
||||
}
|
||||
|
||||
void X64Emitter::ReloadEDX() {
|
||||
mov(rdx, qword[rcx + 8]); // membase
|
||||
}
|
||||
|
||||
void X64Emitter::LoadEflags() {
|
||||
#if STORE_EFLAGS
|
||||
mov(eax, dword[rsp + STASH_OFFSET]);
|
||||
push(rax);
|
||||
popf();
|
||||
#else
|
||||
// EFLAGS already present.
|
||||
#endif // STORE_EFLAGS
|
||||
}
|
||||
|
||||
void X64Emitter::StoreEflags() {
|
||||
#if STORE_EFLAGS
|
||||
pushf();
|
||||
pop(qword[rsp + STASH_OFFSET]);
|
||||
#else
|
||||
// EFLAGS should have CA set?
|
||||
// (so long as we don't fuck with it)
|
||||
#endif // STORE_EFLAGS
|
||||
}
|
||||
|
||||
bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) {
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
return true;
|
||||
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
|
||||
// Negative number that fits in 32bits.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void X64Emitter::MovMem64(const RegExp& addr, uint64_t v) {
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
mov(qword[addr], v);
|
||||
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
|
||||
// Negative number that fits in 32bits.
|
||||
mov(qword[addr], v);
|
||||
} else if (!(v >> 32)) {
|
||||
// All high bits are zero. It'd be nice if we had a way to load a 32bit
|
||||
// immediate without sign extending!
|
||||
// TODO(benvanik): this is super common, find a better way.
|
||||
mov(dword[addr], static_cast<uint32_t>(v));
|
||||
mov(dword[addr + 4], 0);
|
||||
} else {
|
||||
// 64bit number that needs double movs.
|
||||
mov(dword[addr], static_cast<uint32_t>(v));
|
||||
mov(dword[addr + 4], static_cast<uint32_t>(v >> 32));
|
||||
}
|
||||
}
|
||||
|
||||
Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||
static const vec128_t xmm_consts[] = {
|
||||
/* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f),
|
||||
/* XMMOne */ vec128f(1.0f, 1.0f, 1.0f, 1.0f),
|
||||
/* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f),
|
||||
/* XMMMaskX16Y16 */ vec128i(0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000),
|
||||
/* XMMFlipX16Y16 */ vec128i(0x00008000, 0x00000000, 0x00000000, 0x00000000),
|
||||
/* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f),
|
||||
/* XMMNormalizeX16Y16 */ vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
|
||||
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
|
||||
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
|
||||
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
|
||||
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
|
||||
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02),
|
||||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
|
||||
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
||||
/* XMMOneMask */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||
};
|
||||
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
|
||||
// prevent this move.
|
||||
// TODO(benvanik): move to predictable location in PPCContext? could then
|
||||
// just do rcx relative addression with no rax overwriting.
|
||||
mov(rax, (uint64_t)&xmm_consts[id]);
|
||||
return ptr[rax];
|
||||
}
|
||||
|
||||
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
|
||||
// http://www.agner.org/optimize/optimizing_assembly.pdf
|
||||
// 13.4 Generating constants
|
||||
if (!v.low && !v.high) {
|
||||
// 0000...
|
||||
vpxor(dest, dest);
|
||||
} else if (v.low == ~0ull && v.high == ~0ull) {
|
||||
// 1111...
|
||||
vmovaps(dest, GetXmmConstPtr(XMMOneMask));
|
||||
} else {
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
MovMem64(rsp + STASH_OFFSET, v.low);
|
||||
MovMem64(rsp + STASH_OFFSET + 8, v.high);
|
||||
vmovdqa(dest, ptr[rsp + STASH_OFFSET]);
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, float v) {
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} x = { v };
|
||||
if (!v) {
|
||||
// 0
|
||||
vpxor(dest, dest);
|
||||
} else if (x.i == ~0UL) {
|
||||
// 1111...
|
||||
vmovaps(dest, GetXmmConstPtr(XMMOneMask));
|
||||
} else {
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
mov(eax, x.i);
|
||||
vmovd(dest, eax);
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, double v) {
|
||||
union {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} x = { v };
|
||||
if (!v) {
|
||||
// 0
|
||||
vpxor(dest, dest);
|
||||
} else if (x.i == ~0ULL) {
|
||||
// 1111...
|
||||
vmovaps(dest, GetXmmConstPtr(XMMOneMask));
|
||||
} else {
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
mov(rax, x.i);
|
||||
vmovq(dest, rax);
|
||||
}
|
||||
}
|
||||
|
||||
Address X64Emitter::StashXmm(const Xmm& r) {
|
||||
auto addr = ptr[rsp + STASH_OFFSET];
|
||||
vmovups(addr, r);
|
||||
return addr;
|
||||
}
|
||||
|
||||
Address X64Emitter::StashXmm(const vec128_t& v) {
|
||||
auto addr = ptr[rsp + STASH_OFFSET];
|
||||
LoadConstantXmm(xmm0, v);
|
||||
vmovups(addr, xmm0);
|
||||
return addr;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
XEDECLARECLASS2(alloy, hir, HIRBuilder);
|
||||
XEDECLARECLASS2(alloy, hir, Instr);
|
||||
XEDECLARECLASS2(alloy, runtime, DebugInfo);
|
||||
XEDECLARECLASS2(alloy, runtime, FunctionInfo);
|
||||
XEDECLARECLASS2(alloy, runtime, Runtime);
|
||||
XEDECLARECLASS2(alloy, runtime, SymbolInfo);
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
|
@ -33,6 +35,25 @@ enum RegisterFlags {
|
|||
REG_ABCD = (1 << 1),
|
||||
};
|
||||
|
||||
enum XmmConst {
|
||||
XMMZero = 0,
|
||||
XMMOne = 1,
|
||||
XMMNegativeOne = 2,
|
||||
XMMMaskX16Y16 = 3,
|
||||
XMMFlipX16Y16 = 4,
|
||||
XMMFixX16Y16 = 5,
|
||||
XMMNormalizeX16Y16 = 6,
|
||||
XMM3301 = 7,
|
||||
XMMSignMaskPS = 8,
|
||||
XMMSignMaskPD = 9,
|
||||
XMMByteSwapMask = 10,
|
||||
XMMPermuteControl15 = 11,
|
||||
XMMUnpackD3DCOLOR = 12,
|
||||
XMMOneOver255 = 13,
|
||||
XMMShiftMaskPS = 14,
|
||||
XMMOneMask = 15,
|
||||
};
|
||||
|
||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||
class XbyakAllocator : public Xbyak::Allocator {
|
||||
public:
|
||||
|
@ -54,79 +75,68 @@ public:
|
|||
void*& out_code_address, size_t& out_code_size);
|
||||
|
||||
public:
|
||||
template<typename V0>
|
||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) {
|
||||
SetupReg(v0, r0);
|
||||
}
|
||||
template<typename V0, typename V1>
|
||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||
hir::Value* v1, V1& r1, uint32_t r1_flags) {
|
||||
SetupReg(v0, r0);
|
||||
SetupReg(v1, r1);
|
||||
}
|
||||
template<typename V0, typename V1, typename V2>
|
||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||
hir::Value* v1, V1& r1, uint32_t r1_flags,
|
||||
hir::Value* v2, V2& r2, uint32_t r2_flags) {
|
||||
SetupReg(v0, r0);
|
||||
SetupReg(v1, r1);
|
||||
SetupReg(v2, r2);
|
||||
}
|
||||
template<typename V0, typename V1, typename V2, typename V3>
|
||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||
hir::Value* v1, V1& r1, uint32_t r1_flags,
|
||||
hir::Value* v2, V2& r2, uint32_t r2_flags,
|
||||
hir::Value* v3, V3& r3, uint32_t r3_flags) {
|
||||
SetupReg(v0, r0);
|
||||
SetupReg(v1, r1);
|
||||
SetupReg(v2, r2);
|
||||
SetupReg(v3, r3);
|
||||
}
|
||||
template<typename V0>
|
||||
void EndOp(V0& r0) {
|
||||
}
|
||||
template<typename V0, typename V1>
|
||||
void EndOp(V0& r0, V1& r1) {
|
||||
}
|
||||
template<typename V0, typename V1, typename V2>
|
||||
void EndOp(V0& r0, V1& r1, V2& r2) {
|
||||
}
|
||||
template<typename V0, typename V1, typename V2, typename V3>
|
||||
void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) {
|
||||
}
|
||||
|
||||
// Reserved: rsp
|
||||
// Scratch: rax/rcx/rdx
|
||||
// xmm0-1
|
||||
// xmm0-2 (could be only xmm0 with some trickery)
|
||||
// Available: rbx, r12-r15 (save to get r8-r11, rbp, rsi, rdi?)
|
||||
// xmm6-xmm15 (save to get xmm2-xmm5)
|
||||
// xmm6-xmm15 (save to get xmm3-xmm5)
|
||||
static const int GPR_COUNT = 5;
|
||||
static const int XMM_COUNT = 10;
|
||||
|
||||
static void SetupReg(hir::Value* v, Xbyak::Reg8& r) {
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Reg8& r) {
|
||||
auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = Xbyak::Reg8(idx);
|
||||
}
|
||||
static void SetupReg(hir::Value* v, Xbyak::Reg16& r) {
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Reg16& r) {
|
||||
auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = Xbyak::Reg16(idx);
|
||||
}
|
||||
static void SetupReg(hir::Value* v, Xbyak::Reg32& r) {
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Reg32& r) {
|
||||
auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = Xbyak::Reg32(idx);
|
||||
}
|
||||
static void SetupReg(hir::Value* v, Xbyak::Reg64& r) {
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Reg64& r) {
|
||||
auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = Xbyak::Reg64(idx);
|
||||
}
|
||||
static void SetupReg(hir::Value* v, Xbyak::Xmm& r) {
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Xmm& r) {
|
||||
auto idx = xmm_reg_map_[v->reg.index];
|
||||
r = Xbyak::Xmm(idx);
|
||||
}
|
||||
|
||||
hir::Instr* Advance(hir::Instr* i);
|
||||
void MarkSourceOffset(const hir::Instr* i);
|
||||
|
||||
void MarkSourceOffset(hir::Instr* i);
|
||||
void DebugBreak();
|
||||
void Trap();
|
||||
void UnimplementedInstr(const hir::Instr* i);
|
||||
void UnimplementedExtern(const hir::Instr* i);
|
||||
|
||||
void Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info);
|
||||
void CallIndirect(const hir::Instr* instr, const Xbyak::Reg64& reg);
|
||||
void CallExtern(const hir::Instr* instr, const runtime::FunctionInfo* symbol_info);
|
||||
void CallNative(void* fn);
|
||||
void CallNative(uint64_t(*fn)(void* raw_context));
|
||||
void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0));
|
||||
void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0);
|
||||
void SetReturnAddress(uint64_t value);
|
||||
void ReloadECX();
|
||||
void ReloadEDX();
|
||||
|
||||
// TODO(benvanik): Label for epilog (don't use strings).
|
||||
|
||||
void LoadEflags();
|
||||
void StoreEflags();
|
||||
|
||||
// Moves a 64bit immediate into memory.
|
||||
bool ConstantFitsIn32Reg(uint64_t v);
|
||||
void MovMem64(const Xbyak::RegExp& addr, uint64_t v);
|
||||
|
||||
Xbyak::Address GetXmmConstPtr(XmmConst id);
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, float v);
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, double v);
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
|
||||
Xbyak::Address StashXmm(const Xbyak::Xmm& r);
|
||||
Xbyak::Address StashXmm(const vec128_t& v);
|
||||
|
||||
size_t stack_size() const { return stack_size_; }
|
||||
|
||||
|
|
|
@ -0,0 +1,714 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
enum KeyType {
|
||||
KEY_TYPE_X = OPCODE_SIG_TYPE_X,
|
||||
KEY_TYPE_L = OPCODE_SIG_TYPE_L,
|
||||
KEY_TYPE_O = OPCODE_SIG_TYPE_O,
|
||||
KEY_TYPE_S = OPCODE_SIG_TYPE_S,
|
||||
KEY_TYPE_V_I8 = OPCODE_SIG_TYPE_V + INT8_TYPE,
|
||||
KEY_TYPE_V_I16 = OPCODE_SIG_TYPE_V + INT16_TYPE,
|
||||
KEY_TYPE_V_I32 = OPCODE_SIG_TYPE_V + INT32_TYPE,
|
||||
KEY_TYPE_V_I64 = OPCODE_SIG_TYPE_V + INT64_TYPE,
|
||||
KEY_TYPE_V_F32 = OPCODE_SIG_TYPE_V + FLOAT32_TYPE,
|
||||
KEY_TYPE_V_F64 = OPCODE_SIG_TYPE_V + FLOAT64_TYPE,
|
||||
KEY_TYPE_V_V128 = OPCODE_SIG_TYPE_V + VEC128_TYPE,
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union InstrKey {
|
||||
struct {
|
||||
uint32_t opcode : 8;
|
||||
uint32_t dest : 5;
|
||||
uint32_t src1 : 5;
|
||||
uint32_t src2 : 5;
|
||||
uint32_t src3 : 5;
|
||||
uint32_t reserved : 4;
|
||||
};
|
||||
uint32_t value;
|
||||
|
||||
operator uint32_t() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
InstrKey() : value(0) {}
|
||||
InstrKey(uint32_t v) : value(v) {}
|
||||
InstrKey(const Instr* i) : value(0) {
|
||||
opcode = i->opcode->num;
|
||||
uint32_t sig = i->opcode->signature;
|
||||
dest = GET_OPCODE_SIG_TYPE_DEST(sig) ? OPCODE_SIG_TYPE_V + i->dest->type : 0;
|
||||
src1 = GET_OPCODE_SIG_TYPE_SRC1(sig);
|
||||
if (src1 == OPCODE_SIG_TYPE_V) {
|
||||
src1 += i->src1.value->type;
|
||||
}
|
||||
src2 = GET_OPCODE_SIG_TYPE_SRC2(sig);
|
||||
if (src2 == OPCODE_SIG_TYPE_V) {
|
||||
src2 += i->src2.value->type;
|
||||
}
|
||||
src3 = GET_OPCODE_SIG_TYPE_SRC3(sig);
|
||||
if (src3 == OPCODE_SIG_TYPE_V) {
|
||||
src3 += i->src3.value->type;
|
||||
}
|
||||
}
|
||||
|
||||
template <Opcode OPCODE,
|
||||
KeyType DEST = KEY_TYPE_X,
|
||||
KeyType SRC1 = KEY_TYPE_X,
|
||||
KeyType SRC2 = KEY_TYPE_X,
|
||||
KeyType SRC3 = KEY_TYPE_X>
|
||||
struct Construct {
|
||||
static const uint32_t value =
|
||||
(OPCODE) | (DEST << 8) | (SRC1 << 13) | (SRC2 << 18) | (SRC3 << 23);
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
static_assert(sizeof(InstrKey) <= 4, "Key must be 4 bytes");
|
||||
|
||||
template <typename... Ts>
|
||||
struct CombinedStruct;
|
||||
template <>
|
||||
struct CombinedStruct<> {};
|
||||
template <typename T, typename... Ts>
|
||||
struct CombinedStruct<T, Ts...> : T, CombinedStruct<Ts...> {};
|
||||
|
||||
struct OpBase {};
|
||||
|
||||
template <typename T, KeyType KEY_TYPE>
|
||||
struct Op : OpBase {
|
||||
static const KeyType key_type = KEY_TYPE;
|
||||
};
|
||||
|
||||
struct VoidOp : Op<VoidOp, KEY_TYPE_X> {
|
||||
protected:
|
||||
template <typename T, KeyType KEY_TYPE> friend struct Op;
|
||||
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
|
||||
void Load(const Instr::Op& op) {}
|
||||
};
|
||||
|
||||
struct OffsetOp : Op<OffsetOp, KEY_TYPE_O> {
|
||||
uint64_t value;
|
||||
protected:
|
||||
template <typename T, KeyType KEY_TYPE> friend struct Op;
|
||||
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
|
||||
void Load(const Instr::Op& op) {
|
||||
this->value = op.offset;
|
||||
}
|
||||
};
|
||||
|
||||
struct SymbolOp : Op<SymbolOp, KEY_TYPE_S> {
|
||||
FunctionInfo* value;
|
||||
protected:
|
||||
template <typename T, KeyType KEY_TYPE> friend struct Op;
|
||||
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
|
||||
bool Load(const Instr::Op& op) {
|
||||
this->value = op.symbol_info;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct LabelOp : Op<LabelOp, KEY_TYPE_L> {
|
||||
hir::Label* value;
|
||||
protected:
|
||||
template <typename T, KeyType KEY_TYPE> friend struct Op;
|
||||
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
|
||||
void Load(const Instr::Op& op) {
|
||||
this->value = op.label;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, KeyType KEY_TYPE, typename REG_TYPE, typename CONST_TYPE, int TAG = -1>
|
||||
struct ValueOp : Op<ValueOp<T, KEY_TYPE, REG_TYPE, CONST_TYPE, TAG>, KEY_TYPE> {
|
||||
typedef REG_TYPE reg_type;
|
||||
static const int tag = TAG;
|
||||
const Value* value;
|
||||
bool is_constant;
|
||||
virtual bool ConstantFitsIn32Reg() const { return true; }
|
||||
const REG_TYPE& reg() const {
|
||||
XEASSERT(!is_constant);
|
||||
return reg_;
|
||||
}
|
||||
operator const REG_TYPE&() const {
|
||||
return reg();
|
||||
}
|
||||
bool IsEqual(const T& b) const {
|
||||
if (is_constant && b.is_constant) {
|
||||
return reinterpret_cast<const T*>(this)->constant() == b.constant();
|
||||
} else if (!is_constant && !b.is_constant) {
|
||||
return reg_.getIdx() == b.reg_.getIdx();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool IsEqual(const Xbyak::Reg& b) const {
|
||||
if (is_constant) {
|
||||
return false;
|
||||
} else if (!is_constant) {
|
||||
return reg_.getIdx() == b.getIdx();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool operator== (const T& b) const {
|
||||
return IsEqual(b);
|
||||
}
|
||||
bool operator!= (const T& b) const {
|
||||
return !IsEqual(b);
|
||||
}
|
||||
bool operator== (const Xbyak::Reg& b) const {
|
||||
return IsEqual(b);
|
||||
}
|
||||
bool operator!= (const Xbyak::Reg& b) const {
|
||||
return !IsEqual(b);
|
||||
}
|
||||
void Load(const Instr::Op& op) {
|
||||
const Value* value = op.value;
|
||||
this->value = value;
|
||||
is_constant = value->IsConstant();
|
||||
if (!is_constant) {
|
||||
X64Emitter::SetupReg(value, reg_);
|
||||
}
|
||||
}
|
||||
protected:
|
||||
REG_TYPE reg_;
|
||||
};
|
||||
|
||||
template <int TAG = -1>
|
||||
struct I8 : ValueOp<I8<TAG>, KEY_TYPE_V_I8, Reg8, int8_t, TAG> {
|
||||
const int8_t constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.i8;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct I16 : ValueOp<I16<TAG>, KEY_TYPE_V_I16, Reg16, int16_t, TAG> {
|
||||
const int16_t constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.i16;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct I32 : ValueOp<I32<TAG>, KEY_TYPE_V_I32, Reg32, int32_t, TAG> {
|
||||
const int32_t constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.i32;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct I64 : ValueOp<I64<TAG>, KEY_TYPE_V_I64, Reg64, int64_t, TAG> {
|
||||
const int64_t constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.i64;
|
||||
}
|
||||
bool ConstantFitsIn32Reg() const override {
|
||||
int64_t v = value->constant.i64;
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
return true;
|
||||
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
|
||||
// Negative number that fits in 32bits.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct F32 : ValueOp<F32<TAG>, KEY_TYPE_V_F32, Xmm, float, TAG> {
|
||||
const float constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.f32;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct F64 : ValueOp<F64<TAG>, KEY_TYPE_V_F64, Xmm, double, TAG> {
|
||||
const double constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.f64;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct V128 : ValueOp<V128<TAG>, KEY_TYPE_V_V128, Xmm, vec128_t, TAG> {
|
||||
const vec128_t& constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.v128;
|
||||
}
|
||||
};
|
||||
|
||||
struct TagTable {
|
||||
struct {
|
||||
bool valid;
|
||||
Instr::Op op;
|
||||
} table[16];
|
||||
|
||||
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_X>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
return true;
|
||||
}
|
||||
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_L>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
return true;
|
||||
}
|
||||
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_O>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
return true;
|
||||
}
|
||||
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_S>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
return true;
|
||||
}
|
||||
template <typename T, typename std::enable_if<T::key_type >= KEY_TYPE_V_I8>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
const Value* value = op.value;
|
||||
if (T::tag == -1) {
|
||||
return true;
|
||||
}
|
||||
if (table[T::tag].valid &&
|
||||
table[T::tag].op.value != value) {
|
||||
return false;
|
||||
}
|
||||
table[T::tag].valid = true;
|
||||
table[T::tag].op.value = (Value*)value;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DEST, typename... Tf>
|
||||
struct DestField;
|
||||
template <typename DEST>
|
||||
struct DestField<DEST> {
|
||||
DEST dest;
|
||||
protected:
|
||||
bool LoadDest(const Instr* i, TagTable& tag_table) {
|
||||
Instr::Op op;
|
||||
op.value = i->dest;
|
||||
if (tag_table.CheckTag<DEST>(op)) {
|
||||
dest.Load(op);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <>
|
||||
struct DestField<VoidOp> {
|
||||
protected:
|
||||
bool LoadDest(const Instr* i, TagTable& tag_table) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <hir::Opcode OPCODE, typename... Ts>
|
||||
struct I;
|
||||
template <hir::Opcode OPCODE, typename DEST>
|
||||
struct I<OPCODE, DEST> : DestField<DEST> {
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
const Instr* instr;
|
||||
protected:
|
||||
template <typename... Ti> friend struct SequenceFields;
|
||||
bool Load(const Instr* i, TagTable& tag_table) {
|
||||
if (InstrKey(i).value == key &&
|
||||
LoadDest(i, tag_table)) {
|
||||
instr = i;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1>
|
||||
struct I<OPCODE, DEST, SRC1> : DestField<DEST> {
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1;
|
||||
protected:
|
||||
template <typename... Ti> friend struct SequenceFields;
|
||||
bool Load(const Instr* i, TagTable& tag_table) {
|
||||
if (InstrKey(i).value == key &&
|
||||
LoadDest(i, tag_table) &&
|
||||
tag_table.CheckTag<SRC1>(i->src1)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2>
|
||||
struct I<OPCODE, DEST, SRC1, SRC2> : DestField<DEST> {
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type, SRC2::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
static const KeyType src2_type = SRC2::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1;
|
||||
SRC2 src2;
|
||||
protected:
|
||||
template <typename... Ti> friend struct SequenceFields;
|
||||
bool Load(const Instr* i, TagTable& tag_table) {
|
||||
if (InstrKey(i).value == key &&
|
||||
LoadDest(i, tag_table) &&
|
||||
tag_table.CheckTag<SRC1>(i->src1) &&
|
||||
tag_table.CheckTag<SRC2>(i->src2)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
src2.Load(i->src2);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2, typename SRC3>
|
||||
struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type, SRC2::key_type, SRC3::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
static const KeyType src2_type = SRC2::key_type;
|
||||
static const KeyType src3_type = SRC3::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1;
|
||||
SRC2 src2;
|
||||
SRC3 src3;
|
||||
protected:
|
||||
template <typename... Ti> friend struct SequenceFields;
|
||||
bool Load(const Instr* i, TagTable& tag_table) {
|
||||
if (InstrKey(i).value == key &&
|
||||
LoadDest(i, tag_table) &&
|
||||
tag_table.CheckTag<SRC1>(i->src1) &&
|
||||
tag_table.CheckTag<SRC2>(i->src2) &&
|
||||
tag_table.CheckTag<SRC3>(i->src3)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
src2.Load(i->src2);
|
||||
src3.Load(i->src3);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename... Ti>
|
||||
struct SequenceFields;
|
||||
template <typename I1>
|
||||
struct SequenceFields<I1> {
|
||||
I1 i1;
|
||||
typedef typename I1 I1Type;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (i1.Load(i, tag_table)) {
|
||||
*new_tail = i->next;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename I1, typename I2>
|
||||
struct SequenceFields<I1, I2> : SequenceFields<I1> {
|
||||
I2 i2;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (SequenceFields<I1>::Check(i, tag_table, new_tail)) {
|
||||
auto ni = i->next;
|
||||
if (ni && i2.Load(ni, tag_table)) {
|
||||
*new_tail = ni;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename I1, typename I2, typename I3>
|
||||
struct SequenceFields<I1, I2, I3> : SequenceFields<I1, I2> {
|
||||
I3 i3;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (SequenceFields<I1, I2>::Check(i, tag_table, new_tail)) {
|
||||
auto ni = i->next;
|
||||
if (ni && i3.Load(ni, tag_table)) {
|
||||
*new_tail = ni;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename I1, typename I2, typename I3, typename I4>
|
||||
struct SequenceFields<I1, I2, I3, I4> : SequenceFields<I1, I2, I3> {
|
||||
I4 i4;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (SequenceFields<I1, I2, I3>::Check(i, tag_table, new_tail)) {
|
||||
auto ni = i->next;
|
||||
if (ni && i4.Load(ni, tag_table)) {
|
||||
*new_tail = ni;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename I1, typename I2, typename I3, typename I4, typename I5>
|
||||
struct SequenceFields<I1, I2, I3, I4, I5> : SequenceFields<I1, I2, I3, I4> {
|
||||
I5 i5;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (SequenceFields<I1, I2, I3, I4>::Check(i, tag_table, new_tail)) {
|
||||
auto ni = i->next;
|
||||
if (ni && i5.Load(ni, tag_table)) {
|
||||
*new_tail = ni;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename SEQ, typename... Ti>
|
||||
struct Sequence {
|
||||
struct EmitArgs : SequenceFields<Ti...> {};
|
||||
|
||||
static bool Select(X64Emitter& e, const Instr* i, const Instr** new_tail) {
|
||||
EmitArgs args;
|
||||
TagTable tag_table;
|
||||
if (!args.Check(i, tag_table, new_tail)) {
|
||||
return false;
|
||||
}
|
||||
SEQ::Emit(e, args);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
const T GetTempReg(X64Emitter& e);
|
||||
template <>
|
||||
const Reg8 GetTempReg<Reg8>(X64Emitter& e) {
|
||||
return e.al;
|
||||
}
|
||||
template <>
|
||||
const Reg16 GetTempReg<Reg16>(X64Emitter& e) {
|
||||
return e.ax;
|
||||
}
|
||||
template <>
|
||||
const Reg32 GetTempReg<Reg32>(X64Emitter& e) {
|
||||
return e.eax;
|
||||
}
|
||||
template <>
|
||||
const Reg64 GetTempReg<Reg64>(X64Emitter& e) {
|
||||
return e.rax;
|
||||
}
|
||||
|
||||
template <typename SEQ, typename T>
|
||||
struct SingleSequence : public Sequence<SingleSequence<SEQ, T>, T> {
|
||||
typedef T EmitArgType;
|
||||
static const uint32_t head_key = T::key;
|
||||
static void Emit(X64Emitter& e, const EmitArgs& _) {
|
||||
SEQ::Emit(e, _.i1);
|
||||
}
|
||||
|
||||
template <typename REG_FN>
|
||||
static void EmitUnaryOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_FN& reg_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
e.mov(i.dest, i.src1.constant());
|
||||
reg_fn(e, i.dest);
|
||||
} else {
|
||||
if (i.dest != i.src1) {
|
||||
e.mov(i.dest, i.src1);
|
||||
}
|
||||
reg_fn(e, i.dest);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitCommutativeBinaryOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
if (i.dest == i.src2) {
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src1.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
|
||||
e.mov(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.dest == i.src1) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, i.src1);
|
||||
}
|
||||
} else {
|
||||
if (i.dest == i.src1) {
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
} else if (i.dest == i.src2) {
|
||||
reg_reg_fn(e, i.dest, i.src1);
|
||||
} else {
|
||||
e.mov(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
}
|
||||
}
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitAssociativeBinaryOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
if (i.dest == i.src2) {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2);
|
||||
e.mov(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
} else {
|
||||
e.mov(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.dest == i.src1) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, i.src1);
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (i.dest == i.src1) {
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
} else if (i.dest == i.src2) {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2);
|
||||
e.mov(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
} else {
|
||||
e.mov(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitCommutativeCompareOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.src2, static_cast<int32_t>(i.src1.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
|
||||
e.mov(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.src2, temp);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.src1, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.src1, temp);
|
||||
}
|
||||
} else {
|
||||
reg_reg_fn(e, i.src1, i.src2);
|
||||
}
|
||||
}
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitAssociativeCompareOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, i.src2, static_cast<int32_t>(i.src1.constant()), true);
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
|
||||
e.mov(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2, temp, true);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, i.src1, static_cast<int32_t>(i.src2.constant()), false);
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, i.src1, temp, false);
|
||||
}
|
||||
} else {
|
||||
reg_reg_fn(e, i.dest, i.src1, i.src2, false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static const int ANY = -1;
|
||||
typedef int tag_t;
|
||||
static const tag_t TAG0 = 0;
|
||||
static const tag_t TAG1 = 1;
|
||||
static const tag_t TAG2 = 2;
|
||||
static const tag_t TAG3 = 3;
|
||||
static const tag_t TAG4 = 4;
|
||||
static const tag_t TAG5 = 5;
|
||||
static const tag_t TAG6 = 6;
|
||||
static const tag_t TAG7 = 7;
|
||||
|
||||
typedef bool (*SequenceSelectFn)(X64Emitter&, const Instr*, const Instr**);
|
||||
|
||||
template <typename T>
|
||||
void Register() {
|
||||
sequence_table.insert({ T::head_key, T::Select });
|
||||
}
|
||||
template <typename T, typename Tn, typename... Ts>
|
||||
void Register() {
|
||||
Register<T>();
|
||||
Register<Tn, Ts...>();
|
||||
};
|
||||
#define EMITTER_OPCODE_TABLE(name, ...) \
|
||||
void Register_##name() { \
|
||||
Register<__VA_ARGS__>(); \
|
||||
}
|
||||
|
||||
#define MATCH(...) __VA_ARGS__
|
||||
#define EMITTER(name, match) struct name : SingleSequence<name, match>
|
||||
#define SEQUENCE(name, match) struct name : Sequence<name, match>
|
||||
|
||||
} // namespace
|
File diff suppressed because it is too large
Load Diff
|
@ -2,32 +2,32 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_
|
||||
#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_
|
||||
#ifndef ALLOY_BACKEND_X64_X64_SEQUENCES_H_
|
||||
#define ALLOY_BACKEND_X64_X64_SEQUENCES_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/hir/instr.h>
|
||||
|
||||
XEDECLARECLASS2(alloy, hir, Instr);
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
namespace lowering {
|
||||
|
||||
class LoweringTable;
|
||||
|
||||
void RegisterSequences(LoweringTable* table);
|
||||
class X64Emitter;
|
||||
|
||||
|
||||
void RegisterSequences();
|
||||
bool SelectSequence(X64Emitter& e, const hir::Instr* i, const hir::Instr** new_tail);
|
||||
|
||||
|
||||
} // namespace lowering
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_
|
||||
#endif // ALLOY_BACKEND_X64_X64_SEQUENCES_H_
|
|
@ -7,7 +7,7 @@
|
|||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/backend/x64/lowering/tracers.h>
|
||||
#include <alloy/backend/x64/x64_tracers.h>
|
||||
|
||||
#include <alloy/backend/x64/x64_emitter.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
@ -15,19 +15,14 @@
|
|||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend::x64;
|
||||
using namespace alloy::backend::x64::lowering;
|
||||
using namespace alloy::runtime;
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
namespace lowering {
|
||||
|
||||
|
||||
#define IFLUSH()
|
||||
#define IPRINT
|
||||
#define DFLUSH()
|
||||
#define DPRINT
|
||||
#define ITRACE 0
|
||||
#define DTRACE 0
|
||||
|
||||
#define TARGET_THREAD 1
|
||||
|
||||
|
@ -36,6 +31,16 @@ namespace lowering {
|
|||
#define DFLUSH() fflush(stdout)
|
||||
#define DPRINT DFLUSH(); if (thread_state->thread_id() == TARGET_THREAD) printf
|
||||
|
||||
uint32_t GetTracingMode() {
|
||||
uint32_t mode = 0;
|
||||
#if ITRACE
|
||||
mode |= TRACING_INSTR;
|
||||
#endif // ITRACE
|
||||
#if DTRACE
|
||||
mode |= TRACING_DATA;
|
||||
#endif // DTRACE
|
||||
return mode;
|
||||
}
|
||||
|
||||
void TraceString(void* raw_context, const char* str) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
|
@ -190,7 +195,6 @@ void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) {
|
|||
}
|
||||
|
||||
|
||||
} // namespace lowering
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
|
@ -7,8 +7,8 @@
|
|||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_
|
||||
#define ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_
|
||||
#ifndef ALLOY_BACKEND_X64_X64_TRACERS_H_
|
||||
#define ALLOY_BACKEND_X64_X64_TRACERS_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
|
||||
|
@ -33,7 +33,15 @@ namespace alloy {
|
|||
namespace backend {
|
||||
namespace x64 {
|
||||
class X64Emitter;
|
||||
namespace lowering {
|
||||
|
||||
enum TracingMode {
|
||||
TRACING_INSTR = (1 << 1),
|
||||
TRACING_DATA = (1 << 2),
|
||||
};
|
||||
|
||||
uint32_t GetTracingMode();
|
||||
inline bool IsTracingInstr() { return (GetTracingMode() & TRACING_INSTR) != 0; }
|
||||
inline bool IsTracingData() { return (GetTracingMode() & TRACING_DATA) != 0; }
|
||||
|
||||
void TraceString(void* raw_context, const char* str);
|
||||
|
||||
|
@ -69,10 +77,9 @@ void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value);
|
|||
void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value);
|
||||
|
||||
} // namespace lowering
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_
|
||||
#endif // ALLOY_BACKEND_X64_X64_TRACERS_H_
|
|
@ -368,6 +368,13 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
|||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_CNTLZ:
|
||||
if (i->src1.value->IsConstant()) {
|
||||
v->set_zero(v->type);
|
||||
v->CountLeadingZeros(i->src1.value->constant);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
// TODO(benvanik): INSERT/EXTRACT
|
||||
// TODO(benvanik): SPLAT/PERMUTE/SWIZZLE
|
||||
case OPCODE_SPLAT:
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include <alloy/compiler/passes/context_promotion_pass.h>
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include <alloy/compiler/compiler.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
||||
|
@ -20,6 +22,10 @@ using namespace alloy::hir;
|
|||
using namespace alloy::runtime;
|
||||
|
||||
|
||||
DEFINE_bool(store_all_context_values, false,
|
||||
"Don't strip dead context stores to aid in debugging.");
|
||||
|
||||
|
||||
ContextPromotionPass::ContextPromotionPass() :
|
||||
context_values_size_(0), context_values_(0),
|
||||
CompilerPass() {
|
||||
|
@ -69,10 +75,12 @@ int ContextPromotionPass::Run(HIRBuilder* builder) {
|
|||
}
|
||||
|
||||
// Remove all dead stores.
|
||||
block = builder->first_block();
|
||||
while (block) {
|
||||
RemoveDeadStoresBlock(block);
|
||||
block = block->next;
|
||||
if (!FLAGS_store_all_context_values) {
|
||||
block = builder->first_block();
|
||||
while (block) {
|
||||
RemoveDeadStoresBlock(block);
|
||||
block = block->next;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -13,12 +13,6 @@
|
|||
#include <alloy/compiler/compiler.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4244)
|
||||
#pragma warning(disable : 4267)
|
||||
#include <llvm/ADT/BitVector.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::compiler;
|
||||
|
|
|
@ -36,8 +36,6 @@ DataFlowAnalysisPass::~DataFlowAnalysisPass() {
|
|||
}
|
||||
|
||||
int DataFlowAnalysisPass::Run(HIRBuilder* builder) {
|
||||
auto arena = builder->arena();
|
||||
|
||||
// Linearize blocks so that we can detect cycles and propagate dependencies.
|
||||
uint32_t block_count = LinearizeBlocks(builder);
|
||||
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include <alloy/compiler/passes/register_allocation_pass.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::compiler;
|
||||
|
@ -16,180 +18,135 @@ using namespace alloy::compiler::passes;
|
|||
using namespace alloy::hir;
|
||||
|
||||
|
||||
struct RegisterAllocationPass::Interval {
|
||||
uint32_t start_ordinal;
|
||||
uint32_t end_ordinal;
|
||||
Value* value;
|
||||
RegisterFreeUntilSet* free_until_set;
|
||||
// TODO(benvanik): reduce to offsets in arena?
|
||||
struct Interval* next;
|
||||
struct Interval* prev;
|
||||
#define ASSERT_NO_CYCLES 0
|
||||
|
||||
void AddToList(Interval** list_head) {
|
||||
auto list_next = *list_head;
|
||||
this->next = list_next;
|
||||
if (list_next) {
|
||||
list_next->prev = this;
|
||||
}
|
||||
*list_head = this;
|
||||
}
|
||||
|
||||
void InsertIntoList(Interval** list_head) {
|
||||
auto it = *list_head;
|
||||
while (it) {
|
||||
if (it->start_ordinal > this->start_ordinal) {
|
||||
// Went too far. Insert before this interval.
|
||||
this->prev = it->prev;
|
||||
this->next = it;
|
||||
if (it->prev) {
|
||||
it->prev->next = this;
|
||||
} else {
|
||||
*list_head = this;
|
||||
}
|
||||
it->prev = this;
|
||||
return;
|
||||
}
|
||||
if (!it->next) {
|
||||
// None found, add at tail.
|
||||
it->next = this;
|
||||
this->prev = it;
|
||||
return;
|
||||
}
|
||||
it = it->next;
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveFromList(Interval** list_head) {
|
||||
if (this->next) {
|
||||
this->next->prev = this->prev;
|
||||
}
|
||||
if (this->prev) {
|
||||
this->prev->next = this->next;
|
||||
} else {
|
||||
*list_head = this->next;
|
||||
}
|
||||
this->next = this->prev = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
struct RegisterAllocationPass::Intervals {
|
||||
Interval* unhandled;
|
||||
Interval* active;
|
||||
Interval* handled;
|
||||
};
|
||||
|
||||
RegisterAllocationPass::RegisterAllocationPass(
|
||||
const MachineInfo* machine_info) :
|
||||
machine_info_(machine_info),
|
||||
CompilerPass() {
|
||||
// Initialize register sets. The values of these will be
|
||||
// cleared before use, so just the structure is required.
|
||||
// Initialize register sets.
|
||||
// TODO(benvanik): rewrite in a way that makes sense - this is terrible.
|
||||
auto mi_sets = machine_info->register_sets;
|
||||
xe_zero_struct(&free_until_sets_, sizeof(free_until_sets_));
|
||||
xe_zero_struct(&usage_sets_, sizeof(usage_sets_));
|
||||
uint32_t n = 0;
|
||||
while (mi_sets[n].count) {
|
||||
auto& mi_set = mi_sets[n];
|
||||
auto free_until_set = new RegisterFreeUntilSet();
|
||||
free_until_sets_.all_sets[n] = free_until_set;
|
||||
free_until_set->count = mi_set.count;
|
||||
free_until_set->set = &mi_set;
|
||||
auto usage_set = new RegisterSetUsage();
|
||||
usage_sets_.all_sets[n] = usage_set;
|
||||
usage_set->count = mi_set.count;
|
||||
usage_set->set = &mi_set;
|
||||
if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) {
|
||||
free_until_sets_.int_set = free_until_set;
|
||||
usage_sets_.int_set = usage_set;
|
||||
}
|
||||
if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) {
|
||||
free_until_sets_.float_set = free_until_set;
|
||||
usage_sets_.float_set = usage_set;
|
||||
}
|
||||
if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) {
|
||||
free_until_sets_.vec_set = free_until_set;
|
||||
usage_sets_.vec_set = usage_set;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
RegisterAllocationPass::~RegisterAllocationPass() {
|
||||
for (size_t n = 0; n < XECOUNT(free_until_sets_.all_sets); n++) {
|
||||
if (!free_until_sets_.all_sets[n]) {
|
||||
for (size_t n = 0; n < XECOUNT(usage_sets_.all_sets); n++) {
|
||||
if (!usage_sets_.all_sets[n]) {
|
||||
break;
|
||||
}
|
||||
delete free_until_sets_.all_sets[n];
|
||||
delete usage_sets_.all_sets[n];
|
||||
}
|
||||
}
|
||||
|
||||
int RegisterAllocationPass::Run(HIRBuilder* builder) {
|
||||
// A (probably broken) implementation of a linear scan register allocator
|
||||
// that operates directly on SSA form:
|
||||
// http://www.christianwimmer.at/Publications/Wimmer10a/Wimmer10a.pdf
|
||||
//
|
||||
// Requirements:
|
||||
// - SSA form (single definition for variables)
|
||||
// - block should be in linear order:
|
||||
// - dominators *should* come before (a->b->c)
|
||||
// - loop block sequences *should not* have intervening non-loop blocks
|
||||
// Simple per-block allocator that operates on SSA form.
|
||||
// Registers do not move across blocks, though this could be
|
||||
// optimized with some intra-block analysis (dominators/etc).
|
||||
// Really, it'd just be nice to have someone who knew what they
|
||||
// were doing lower SSA and do this right.
|
||||
|
||||
auto arena = scratch_arena();
|
||||
|
||||
// Renumber everything.
|
||||
uint32_t block_ordinal = 0;
|
||||
uint32_t instr_ordinal = 0;
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
// Sequential block ordinals.
|
||||
block->ordinal = block_ordinal++;
|
||||
|
||||
// Reset all state.
|
||||
PrepareBlockState();
|
||||
|
||||
// Renumber all instructions in the block. This is required so that
|
||||
// we can sort the usage pointers below.
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
// Sequential global instruction ordinals.
|
||||
instr->ordinal = instr_ordinal++;
|
||||
instr = instr->next;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
// Compute all liveness ranges by walking forward through all
|
||||
// blocks/instructions and checking the last use of each value. This lets
|
||||
// us know the exact order in (block#,instr#) form, which is then used to
|
||||
// setup the range.
|
||||
// TODO(benvanik): ideally we would have a list of all values and not have
|
||||
// to keep walking instructions over and over.
|
||||
Interval* prev_interval = NULL;
|
||||
Interval* head_interval = NULL;
|
||||
block = builder->first_block();
|
||||
while (block) {
|
||||
auto instr = block->instr_head;
|
||||
instr = block->instr_head;
|
||||
while (instr) {
|
||||
// Compute last-use for the dest value.
|
||||
// Since we know all values of importance must be defined, we can avoid
|
||||
// having to check every value and just look at dest.
|
||||
const OpcodeInfo* info = instr->opcode;
|
||||
if (GET_OPCODE_SIG_TYPE_DEST(info->signature) == OPCODE_SIG_TYPE_V) {
|
||||
auto v = instr->dest;
|
||||
if (!v->last_use) {
|
||||
ComputeLastUse(v);
|
||||
}
|
||||
uint32_t signature = info->signature;
|
||||
|
||||
// Add interval.
|
||||
auto interval = arena->Alloc<Interval>();
|
||||
interval->start_ordinal = instr->ordinal;
|
||||
interval->end_ordinal = v->last_use ?
|
||||
v->last_use->ordinal : v->def->ordinal;
|
||||
interval->value = v;
|
||||
interval->next = NULL;
|
||||
interval->prev = prev_interval;
|
||||
if (prev_interval) {
|
||||
prev_interval->next = interval;
|
||||
} else {
|
||||
head_interval = interval;
|
||||
}
|
||||
prev_interval = interval;
|
||||
// Update the register use heaps.
|
||||
AdvanceUses(instr);
|
||||
|
||||
// Grab register set to use.
|
||||
// We do this now so it's only once per interval, and it makes it easy
|
||||
// to only compare intervals that overlap their sets.
|
||||
if (v->type <= INT64_TYPE) {
|
||||
interval->free_until_set = free_until_sets_.int_set;
|
||||
} else if (v->type <= FLOAT64_TYPE) {
|
||||
interval->free_until_set = free_until_sets_.float_set;
|
||||
// Check sources for retirement. If any are unused after this instruction
|
||||
// we can eagerly evict them to speed up register allocation.
|
||||
// Since X64 (and other platforms) can often take advantage of dest==src1
|
||||
// register mappings we track retired src1 so that we can attempt to
|
||||
// reuse it.
|
||||
// NOTE: these checks require that the usage list be sorted!
|
||||
bool has_preferred_reg = false;
|
||||
RegAssignment preferred_reg = { 0 };
|
||||
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V &&
|
||||
!instr->src1.value->IsConstant()) {
|
||||
if (!instr->src1_use->next) {
|
||||
// Pull off preferred register. We will try to reuse this for the
|
||||
// dest.
|
||||
has_preferred_reg = true;
|
||||
preferred_reg = instr->src1.value->reg;
|
||||
XEASSERTNOTNULL(preferred_reg.set);
|
||||
}
|
||||
}
|
||||
|
||||
if (GET_OPCODE_SIG_TYPE_DEST(signature) == OPCODE_SIG_TYPE_V) {
|
||||
// Must not have been set already.
|
||||
XEASSERTNULL(instr->dest->reg.set);
|
||||
|
||||
// Sort the usage list. We depend on this in future uses of this variable.
|
||||
SortUsageList(instr->dest);
|
||||
|
||||
// If we have a preferred register, use that.
|
||||
// This way we can help along the stupid X86 two opcode instructions.
|
||||
bool allocated;
|
||||
if (has_preferred_reg) {
|
||||
// Allocate with the given preferred register. If the register is in
|
||||
// the wrong set it will not be reused.
|
||||
allocated = TryAllocateRegister(instr->dest, preferred_reg);
|
||||
} else {
|
||||
interval->free_until_set = free_until_sets_.vec_set;
|
||||
// Allocate a register. This will either reserve a free one or
|
||||
// spill and reuse an active one.
|
||||
allocated = TryAllocateRegister(instr->dest);
|
||||
}
|
||||
if (!allocated) {
|
||||
// Failed to allocate register -- need to spill and try again.
|
||||
// We spill only those registers we aren't using.
|
||||
if (!SpillOneRegister(builder, instr->dest->type)) {
|
||||
// Unable to spill anything - this shouldn't happen.
|
||||
XELOGE("Unable to spill any registers");
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Demand allocation.
|
||||
if (!TryAllocateRegister(instr->dest)) {
|
||||
// Boned.
|
||||
XELOGE("Register allocation failed");
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -198,228 +155,266 @@ int RegisterAllocationPass::Run(HIRBuilder* builder) {
|
|||
block = block->next;
|
||||
}
|
||||
|
||||
// Now have a sorted list of intervals, minus their ending ordinals.
|
||||
Intervals intervals;
|
||||
intervals.unhandled = head_interval;
|
||||
intervals.active = intervals.handled = NULL;
|
||||
while (intervals.unhandled) {
|
||||
// Get next unhandled interval.
|
||||
auto current = intervals.unhandled;
|
||||
intervals.unhandled = intervals.unhandled->next;
|
||||
current->RemoveFromList(&intervals.unhandled);
|
||||
|
||||
// Check for intervals in active that are handled or inactive.
|
||||
auto it = intervals.active;
|
||||
while (it) {
|
||||
auto next = it->next;
|
||||
if (it->end_ordinal <= current->start_ordinal) {
|
||||
// Move from active to handled.
|
||||
it->RemoveFromList(&intervals.active);
|
||||
it->AddToList(&intervals.handled);
|
||||
}
|
||||
it = next;
|
||||
}
|
||||
|
||||
// Find a register for current.
|
||||
if (!TryAllocateFreeReg(current, intervals)) {
|
||||
// Failed, spill.
|
||||
AllocateBlockedReg(builder, current, intervals);
|
||||
}
|
||||
|
||||
if (current->value->reg.index!= -1) {
|
||||
// Add current to active.
|
||||
current->AddToList(&intervals.active);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RegisterAllocationPass::ComputeLastUse(Value* value) {
|
||||
// TODO(benvanik): compute during construction?
|
||||
// Note that this list isn't sorted (unfortunately), so we have to scan
|
||||
// them all.
|
||||
uint32_t max_ordinal = 0;
|
||||
Value::Use* last_use = NULL;
|
||||
auto use = value->use_head;
|
||||
while (use) {
|
||||
if (!last_use || use->instr->ordinal >= max_ordinal) {
|
||||
last_use = use;
|
||||
max_ordinal = use->instr->ordinal;
|
||||
}
|
||||
use = use->next;
|
||||
}
|
||||
value->last_use = last_use ? last_use->instr : NULL;
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::TryAllocateFreeReg(
|
||||
Interval* current, Intervals& intervals) {
|
||||
// Reset all registers in the set to unused.
|
||||
auto free_until_set = current->free_until_set;
|
||||
for (uint32_t n = 0; n < free_until_set->count; n++) {
|
||||
free_until_set->pos[n] = -1;
|
||||
}
|
||||
|
||||
// Mark all active registers as used.
|
||||
// TODO(benvanik): keep some kind of bitvector so that this is instant?
|
||||
auto it = intervals.active;
|
||||
while (it) {
|
||||
if (it->free_until_set == free_until_set) {
|
||||
free_until_set->pos[it->value->reg.index] = 0;
|
||||
}
|
||||
it = it->next;
|
||||
}
|
||||
|
||||
uint32_t max_pos = 0;
|
||||
for (uint32_t n = 0; n < free_until_set->count; n++) {
|
||||
if (max_pos == -1) {
|
||||
max_pos = n;
|
||||
} else {
|
||||
if (free_until_set->pos[n] > free_until_set->pos[max_pos]) {
|
||||
max_pos = n;
|
||||
void RegisterAllocationPass::DumpUsage(const char* name) {
|
||||
#if 0
|
||||
fprintf(stdout, "\n%s:\n", name);
|
||||
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
|
||||
auto usage_set = usage_sets_.all_sets[i];
|
||||
if (usage_set) {
|
||||
fprintf(stdout, "set %s:\n", usage_set->set->name);
|
||||
fprintf(stdout, " avail: %s\n", usage_set->availability.to_string().c_str());
|
||||
fprintf(stdout, " upcoming uses:\n");
|
||||
for (auto it = usage_set->upcoming_uses.begin();
|
||||
it != usage_set->upcoming_uses.end(); ++it) {
|
||||
fprintf(stdout, " v%d, used at %d\n",
|
||||
it->value->ordinal,
|
||||
it->use->instr->ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!free_until_set->pos[max_pos]) {
|
||||
// No register available without spilling.
|
||||
return false;
|
||||
}
|
||||
if (current->end_ordinal < free_until_set->pos[max_pos]) {
|
||||
// Register available for the whole interval.
|
||||
current->value->reg.set = free_until_set->set;
|
||||
current->value->reg.index = max_pos;
|
||||
} else {
|
||||
// Register available for the first part of the interval.
|
||||
// Split the interval at where it hits the next one.
|
||||
//current->value->reg = max_pos;
|
||||
//SplitRange(current, free_until_set->pos[max_pos]);
|
||||
// TODO(benvanik): actually split -- for now we just spill.
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void RegisterAllocationPass::AllocateBlockedReg(
|
||||
HIRBuilder* builder, Interval* current, Intervals& intervals) {
|
||||
auto free_until_set = current->free_until_set;
|
||||
|
||||
// TODO(benvanik): smart heuristics.
|
||||
// wimmer AllocateBlockedReg has some stuff for deciding whether to
|
||||
// spill current or some other active interval - which we ignore.
|
||||
|
||||
// Pick a random interval. Maybe the first. Sure.
|
||||
auto spill_interval = intervals.active;
|
||||
Value* spill_value = NULL;
|
||||
Instr* prev_use = NULL;
|
||||
Instr* next_use = NULL;
|
||||
while (spill_interval) {
|
||||
if (spill_interval->free_until_set != free_until_set ||
|
||||
spill_interval->start_ordinal == current->start_ordinal) {
|
||||
// Only interested in ones of the same register set.
|
||||
// We also ensure that ones at the same ordinal as us are ignored,
|
||||
// which can happen with multiple local inserts/etc.
|
||||
spill_interval = spill_interval->next;
|
||||
continue;
|
||||
void RegisterAllocationPass::PrepareBlockState() {
|
||||
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
|
||||
auto usage_set = usage_sets_.all_sets[i];
|
||||
if (usage_set) {
|
||||
usage_set->availability.set();
|
||||
usage_set->upcoming_uses.clear();
|
||||
}
|
||||
spill_value = spill_interval->value;
|
||||
}
|
||||
DumpUsage("PrepareBlockState");
|
||||
}
|
||||
|
||||
// Find the uses right before/after current.
|
||||
auto use = spill_value->use_head;
|
||||
while (use) {
|
||||
if (use->instr->ordinal != -1) {
|
||||
if (use->instr->ordinal < current->start_ordinal) {
|
||||
if (!prev_use || prev_use->ordinal < use->instr->ordinal) {
|
||||
prev_use = use->instr;
|
||||
}
|
||||
} else if (use->instr->ordinal > current->start_ordinal) {
|
||||
if (!next_use || next_use->ordinal > use->instr->ordinal) {
|
||||
next_use = use->instr;
|
||||
}
|
||||
void RegisterAllocationPass::AdvanceUses(Instr* instr) {
|
||||
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
|
||||
auto usage_set = usage_sets_.all_sets[i];
|
||||
if (!usage_set) {
|
||||
break;
|
||||
}
|
||||
auto& upcoming_uses = usage_set->upcoming_uses;
|
||||
for (auto it = upcoming_uses.begin(); it != upcoming_uses.end();) {
|
||||
if (!it->use) {
|
||||
// No uses at all - we can remove right away.
|
||||
// This comes up from instructions where the dest is never used,
|
||||
// like the ATOMIC ops.
|
||||
MarkRegAvailable(it->value->reg);
|
||||
it = upcoming_uses.erase(it);
|
||||
continue;
|
||||
}
|
||||
if (it->use->instr != instr) {
|
||||
// Not yet at this instruction.
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
// The use is from this instruction.
|
||||
if (!it->use->next) {
|
||||
// Last use of the value. We can retire it now.
|
||||
MarkRegAvailable(it->value->reg);
|
||||
it = upcoming_uses.erase(it);
|
||||
} else {
|
||||
// Used again. Push back the next use.
|
||||
// Note that we may be used multiple times this instruction, so
|
||||
// eat those.
|
||||
auto next_use = it->use->next;
|
||||
while (next_use->next && next_use->instr == instr) {
|
||||
next_use = next_use->next;
|
||||
}
|
||||
// Remove the iterator.
|
||||
auto value = it->value;
|
||||
it = upcoming_uses.erase(it);
|
||||
upcoming_uses.emplace_back(value, next_use);
|
||||
}
|
||||
use = use->next;
|
||||
}
|
||||
if (!prev_use) {
|
||||
prev_use = spill_value->def;
|
||||
}
|
||||
if (prev_use->next == next_use) {
|
||||
// Uh, this interval is way too short.
|
||||
spill_interval = spill_interval->next;
|
||||
continue;
|
||||
}
|
||||
XEASSERT(prev_use->ordinal != -1);
|
||||
XEASSERTNOTNULL(next_use);
|
||||
break;
|
||||
}
|
||||
XEASSERT(spill_interval->free_until_set == free_until_set);
|
||||
DumpUsage("AdvanceUses");
|
||||
}
|
||||
|
||||
// Find the real last use -- paired ops may require sequences to stay
|
||||
// intact. This is a bad design.
|
||||
auto prev_def_tail = prev_use;
|
||||
while (prev_def_tail &&
|
||||
prev_def_tail->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
|
||||
prev_def_tail = prev_def_tail->prev;
|
||||
bool RegisterAllocationPass::IsRegInUse(const RegAssignment& reg) {
|
||||
RegisterSetUsage* usage_set;
|
||||
if (reg.set == usage_sets_.int_set->set) {
|
||||
usage_set = usage_sets_.int_set;
|
||||
} else if (reg.set == usage_sets_.float_set->set) {
|
||||
usage_set = usage_sets_.float_set;
|
||||
} else {
|
||||
usage_set = usage_sets_.vec_set;
|
||||
}
|
||||
return !usage_set->availability.test(reg.index);
|
||||
}
|
||||
|
||||
RegisterAllocationPass::RegisterSetUsage*
|
||||
RegisterAllocationPass::MarkRegUsed(const RegAssignment& reg,
|
||||
Value* value, Value::Use* use) {
|
||||
auto usage_set = RegisterSetForValue(value);
|
||||
usage_set->availability.set(reg.index, false);
|
||||
usage_set->upcoming_uses.emplace_back(value, use);
|
||||
DumpUsage("MarkRegUsed");
|
||||
return usage_set;
|
||||
}
|
||||
|
||||
RegisterAllocationPass::RegisterSetUsage*
|
||||
RegisterAllocationPass::MarkRegAvailable(const hir::RegAssignment& reg) {
|
||||
RegisterSetUsage* usage_set;
|
||||
if (reg.set == usage_sets_.int_set->set) {
|
||||
usage_set = usage_sets_.int_set;
|
||||
} else if (reg.set == usage_sets_.float_set->set) {
|
||||
usage_set = usage_sets_.float_set;
|
||||
} else {
|
||||
usage_set = usage_sets_.vec_set;
|
||||
}
|
||||
usage_set->availability.set(reg.index, true);
|
||||
return usage_set;
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::TryAllocateRegister(
|
||||
Value* value, const RegAssignment& preferred_reg) {
|
||||
// If the preferred register matches type and is available, use it.
|
||||
auto usage_set = RegisterSetForValue(value);
|
||||
if (usage_set->set == preferred_reg.set) {
|
||||
// Check if available.
|
||||
if (!IsRegInUse(preferred_reg)) {
|
||||
// Mark as in-use and return. Best case.
|
||||
MarkRegUsed(preferred_reg, value, value->use_head);
|
||||
value->reg = preferred_reg;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
Value* new_value;
|
||||
uint32_t end_ordinal;
|
||||
// Otherwise, fallback to allocating like normal.
|
||||
return TryAllocateRegister(value);
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::TryAllocateRegister(Value* value) {
|
||||
// Get the set this register is in.
|
||||
RegisterSetUsage* usage_set = RegisterSetForValue(value);
|
||||
|
||||
// Find the first free register, if any.
|
||||
// We have to ensure it's a valid one (in our count).
|
||||
unsigned long first_unused = 0;
|
||||
bool all_used = _BitScanForward(&first_unused, usage_set->availability.to_ulong()) == 0;
|
||||
if (!all_used && first_unused < usage_set->count) {
|
||||
// Available! Use it!.
|
||||
value->reg.set = usage_set->set;
|
||||
value->reg.index = first_unused;
|
||||
MarkRegUsed(value->reg, value, value->use_head);
|
||||
return true;
|
||||
}
|
||||
|
||||
// None available! Spill required.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::SpillOneRegister(
|
||||
HIRBuilder* builder, TypeName required_type) {
|
||||
// Get the set that we will be picking from.
|
||||
RegisterSetUsage* usage_set;
|
||||
if (required_type <= INT64_TYPE) {
|
||||
usage_set = usage_sets_.int_set;
|
||||
} else if (required_type <= FLOAT64_TYPE) {
|
||||
usage_set = usage_sets_.float_set;
|
||||
} else {
|
||||
usage_set = usage_sets_.vec_set;
|
||||
}
|
||||
|
||||
DumpUsage("SpillOneRegister (pre)");
|
||||
// Pick the one with the furthest next use.
|
||||
XEASSERT(!usage_set->upcoming_uses.empty());
|
||||
auto furthest_usage = std::max_element(
|
||||
usage_set->upcoming_uses.begin(), usage_set->upcoming_uses.end(),
|
||||
RegisterUsage::Comparer());
|
||||
Value* spill_value = furthest_usage->value;
|
||||
Value::Use* prev_use = furthest_usage->use->prev;
|
||||
Value::Use* next_use = furthest_usage->use;
|
||||
XEASSERTNOTNULL(next_use);
|
||||
usage_set->upcoming_uses.erase(furthest_usage);
|
||||
DumpUsage("SpillOneRegister (post)");
|
||||
const auto reg = spill_value->reg;
|
||||
|
||||
// We know the spill_value use list is sorted, so we can cut it right now.
|
||||
// This makes it easier down below.
|
||||
auto new_head_use = next_use;
|
||||
|
||||
// Allocate local.
|
||||
if (spill_value->local_slot) {
|
||||
// Value is already assigned a slot, so load from that.
|
||||
// We can then split the interval right after the previous use to
|
||||
// before the next use.
|
||||
|
||||
// Update the last use of the spilled interval/value.
|
||||
end_ordinal = spill_interval->end_ordinal;
|
||||
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
|
||||
XEASSERT(end_ordinal != -1);
|
||||
XEASSERT(spill_interval->end_ordinal != -1);
|
||||
|
||||
// Insert a load right before the next use.
|
||||
new_value = builder->LoadLocal(spill_value->local_slot);
|
||||
builder->last_instr()->MoveBefore(next_use);
|
||||
|
||||
// Update last use info.
|
||||
new_value->last_use = spill_value->last_use;
|
||||
spill_value->last_use = prev_use;
|
||||
// Value is already assigned a slot. Since we allocate in order and this is
|
||||
// all SSA we know the stored value will be exactly what we want. Yay,
|
||||
// we can prevent the redundant store!
|
||||
// In fact, we may even want to pin this spilled value so that we always
|
||||
// use the spilled value and prevent the need for more locals.
|
||||
} else {
|
||||
// Allocate a local slot.
|
||||
spill_value->local_slot = builder->AllocLocal(spill_value->type);
|
||||
|
||||
// Insert a spill right after the def.
|
||||
// Add store.
|
||||
builder->StoreLocal(spill_value->local_slot, spill_value);
|
||||
auto spill_store = builder->last_instr();
|
||||
spill_store->MoveBefore(prev_def_tail->next);
|
||||
auto spill_store_use = spill_store->src2_use;
|
||||
XEASSERTNULL(spill_store_use->prev);
|
||||
if (prev_use && prev_use->instr->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
|
||||
// Instruction is paired. This is bad. We will insert the spill after the
|
||||
// paired instruction.
|
||||
XEASSERTNOTNULL(prev_use->instr->next);
|
||||
spill_store->MoveBefore(prev_use->instr->next);
|
||||
|
||||
// Update last use of spilled interval/value.
|
||||
end_ordinal = spill_interval->end_ordinal;
|
||||
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
|
||||
XEASSERT(end_ordinal != -1);
|
||||
XEASSERT(spill_interval->end_ordinal != -1);
|
||||
// Update last use.
|
||||
spill_value->last_use = spill_store;
|
||||
} else if (prev_use) {
|
||||
// We insert the store immediately before the previous use.
|
||||
// If we were smarter we could then re-run allocation and reuse the register
|
||||
// once dropped.
|
||||
spill_store->MoveBefore(prev_use->instr);
|
||||
|
||||
// Insert a load right before the next use.
|
||||
new_value = builder->LoadLocal(spill_value->local_slot);
|
||||
builder->last_instr()->MoveBefore(next_use);
|
||||
// Update last use.
|
||||
spill_value->last_use = prev_use->instr;
|
||||
} else {
|
||||
// This is the first use, so the only thing we have is the define.
|
||||
// Move the store to right after that.
|
||||
spill_store->MoveBefore(spill_value->def->next);
|
||||
|
||||
// Update last use info.
|
||||
new_value->last_use = spill_value->last_use;
|
||||
spill_value->last_use = spill_store;
|
||||
// Update last use.
|
||||
spill_value->last_use = spill_store;
|
||||
}
|
||||
}
|
||||
|
||||
// Reuse the same local slot. Hooray SSA.
|
||||
#if ASSERT_NO_CYCLES
|
||||
builder->AssertNoCycles();
|
||||
spill_value->def->block->AssertNoCycles();
|
||||
#endif // ASSERT_NO_CYCLES
|
||||
|
||||
// Add load.
|
||||
// Inserted immediately before the next use. Since by definition the next
|
||||
// use is after the instruction requesting the spill we know we haven't
|
||||
// done allocation for that code yet and can let that be handled
|
||||
// automatically when we get to it.
|
||||
auto new_value = builder->LoadLocal(spill_value->local_slot);
|
||||
auto spill_load = builder->last_instr();
|
||||
spill_load->MoveBefore(next_use->instr);
|
||||
// Note: implicit first use added.
|
||||
|
||||
#if ASSERT_NO_CYCLES
|
||||
builder->AssertNoCycles();
|
||||
spill_value->def->block->AssertNoCycles();
|
||||
#endif // ASSERT_NO_CYCLES
|
||||
|
||||
// Set the local slot of the new value to our existing one. This way we will
|
||||
// reuse that same memory if needed.
|
||||
new_value->local_slot = spill_value->local_slot;
|
||||
|
||||
// Rename all future uses to that loaded value.
|
||||
auto use = spill_value->use_head;
|
||||
while (use) {
|
||||
// TODO(benvanik): keep use list sorted so we don't have to do this.
|
||||
if (use->instr->ordinal <= spill_interval->end_ordinal ||
|
||||
use->instr->ordinal == -1) {
|
||||
use = use->next;
|
||||
continue;
|
||||
}
|
||||
auto next = use->next;
|
||||
auto instr = use->instr;
|
||||
// Rename all future uses of the SSA value to the new value as loaded
|
||||
// from the local.
|
||||
// We can quickly do this by walking the use list. Because the list is
|
||||
// already sorted we know we are going to end up with a sorted list.
|
||||
auto walk_use = new_head_use;
|
||||
auto new_use_tail = walk_use;
|
||||
while (walk_use) {
|
||||
auto next_walk_use = walk_use->next;
|
||||
auto instr = walk_use->instr;
|
||||
|
||||
uint32_t signature = instr->opcode->signature;
|
||||
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src1.value == spill_value) {
|
||||
|
@ -436,36 +431,107 @@ void RegisterAllocationPass::AllocateBlockedReg(
|
|||
instr->set_src3(new_value);
|
||||
}
|
||||
}
|
||||
use = next;
|
||||
|
||||
walk_use = next_walk_use;
|
||||
if (walk_use) {
|
||||
new_use_tail = walk_use;
|
||||
}
|
||||
}
|
||||
new_value->last_use = new_use_tail->instr;
|
||||
|
||||
// Create new interval.
|
||||
auto arena = scratch_arena();
|
||||
auto new_interval = arena->Alloc<Interval>();
|
||||
new_interval->start_ordinal = new_value->def->ordinal;
|
||||
new_interval->end_ordinal = end_ordinal;
|
||||
new_interval->value = new_value;
|
||||
new_interval->next = NULL;
|
||||
new_interval->prev = NULL;
|
||||
if (new_value->type <= INT64_TYPE) {
|
||||
new_interval->free_until_set = free_until_sets_.int_set;
|
||||
} else if (new_value->type <= FLOAT64_TYPE) {
|
||||
new_interval->free_until_set = free_until_sets_.float_set;
|
||||
} else {
|
||||
new_interval->free_until_set = free_until_sets_.vec_set;
|
||||
}
|
||||
// Update tracking.
|
||||
MarkRegAvailable(reg);
|
||||
|
||||
// Remove the old interval from the active list, as it's been spilled.
|
||||
spill_interval->RemoveFromList(&intervals.active);
|
||||
spill_interval->AddToList(&intervals.handled);
|
||||
|
||||
// Insert interval into the right place in the list.
|
||||
// We know it's ahead of us.
|
||||
new_interval->InsertIntoList(&intervals.unhandled);
|
||||
|
||||
// TODO(benvanik): use the register we just freed?
|
||||
//current->value->reg.set = free_until_set->set;
|
||||
//current->value->reg.index = spill_interval->value->reg.index;
|
||||
bool allocated = TryAllocateFreeReg(current, intervals);
|
||||
XEASSERTTRUE(allocated);
|
||||
return true;
|
||||
}
|
||||
|
||||
RegisterAllocationPass::RegisterSetUsage*
|
||||
RegisterAllocationPass::RegisterSetForValue(
|
||||
const Value* value) {
|
||||
if (value->type <= INT64_TYPE) {
|
||||
return usage_sets_.int_set;
|
||||
} else if (value->type <= FLOAT64_TYPE) {
|
||||
return usage_sets_.float_set;
|
||||
} else {
|
||||
return usage_sets_.vec_set;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
int CompareValueUse(const Value::Use* a, const Value::Use* b) {
|
||||
return a->instr->ordinal - b->instr->ordinal;
|
||||
}
|
||||
} // namespace
|
||||
void RegisterAllocationPass::SortUsageList(Value* value) {
|
||||
// Modified in-place linked list sort from:
|
||||
// http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.c
|
||||
if (!value->use_head) {
|
||||
return;
|
||||
}
|
||||
Value::Use* head = value->use_head;
|
||||
Value::Use* tail = nullptr;
|
||||
int insize = 1;
|
||||
while (true) {
|
||||
auto p = head;
|
||||
head = nullptr;
|
||||
tail = nullptr;
|
||||
// count number of merges we do in this pass
|
||||
int nmerges = 0;
|
||||
while (p) {
|
||||
// there exists a merge to be done
|
||||
nmerges++;
|
||||
// step 'insize' places along from p
|
||||
auto q = p;
|
||||
int psize = 0;
|
||||
for (int i = 0; i < insize; i++) {
|
||||
psize++;
|
||||
q = q->next;
|
||||
if (!q) break;
|
||||
}
|
||||
// if q hasn't fallen off end, we have two lists to merge
|
||||
int qsize = insize;
|
||||
// now we have two lists; merge them
|
||||
while (psize > 0 || (qsize > 0 && q)) {
|
||||
// decide whether next element of merge comes from p or q
|
||||
Value::Use* e = nullptr;
|
||||
if (psize == 0) {
|
||||
// p is empty; e must come from q
|
||||
e = q; q = q->next; qsize--;
|
||||
} else if (qsize == 0 || !q) {
|
||||
// q is empty; e must come from p
|
||||
e = p; p = p->next; psize--;
|
||||
} else if (CompareValueUse(p, q) <= 0) {
|
||||
// First element of p is lower (or same); e must come from p
|
||||
e = p; p = p->next; psize--;
|
||||
} else {
|
||||
// First element of q is lower; e must come from q
|
||||
e = q; q = q->next; qsize--;
|
||||
}
|
||||
// add the next element to the merged list
|
||||
if (tail) {
|
||||
tail->next = e;
|
||||
} else {
|
||||
head = e;
|
||||
}
|
||||
// Maintain reverse pointers in a doubly linked list.
|
||||
e->prev = tail;
|
||||
tail = e;
|
||||
}
|
||||
// now p has stepped 'insize' places along, and q has too
|
||||
p = q;
|
||||
}
|
||||
if (tail) {
|
||||
tail->next = nullptr;
|
||||
}
|
||||
// If we have done only one merge, we're finished
|
||||
if (nmerges <= 1) {
|
||||
// allow for nmerges==0, the empty list case
|
||||
break;
|
||||
}
|
||||
// Otherwise repeat, merging lists twice the size
|
||||
insize *= 2;
|
||||
}
|
||||
|
||||
value->use_head = head;
|
||||
value->last_use = tail->instr;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,10 @@
|
|||
#ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
||||
#define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <vector>
|
||||
|
||||
#include <alloy/backend/machine_info.h>
|
||||
#include <alloy/compiler/compiler_pass.h>
|
||||
|
||||
|
@ -27,28 +31,53 @@ public:
|
|||
virtual int Run(hir::HIRBuilder* builder);
|
||||
|
||||
private:
|
||||
struct Interval;
|
||||
struct Intervals;
|
||||
void ComputeLastUse(hir::Value* value);
|
||||
bool TryAllocateFreeReg(Interval* current, Intervals& intervals);
|
||||
void AllocateBlockedReg(hir::HIRBuilder* builder,
|
||||
Interval* current, Intervals& intervals);
|
||||
// TODO(benvanik): rewrite all this set shit -- too much indirection, the
|
||||
// complexity is not needed.
|
||||
struct RegisterUsage {
|
||||
hir::Value* value;
|
||||
hir::Value::Use* use;
|
||||
RegisterUsage() : value(nullptr), use(nullptr) {}
|
||||
RegisterUsage(hir::Value* value_, hir::Value::Use* use_)
|
||||
: value(value_), use(use_) {}
|
||||
struct Comparer : std::binary_function<RegisterUsage, RegisterUsage, bool> {
|
||||
bool operator()(const RegisterUsage& a, const RegisterUsage& b) const {
|
||||
return a.use->instr->ordinal < b.use->instr->ordinal;
|
||||
}
|
||||
};
|
||||
};
|
||||
struct RegisterSetUsage {
|
||||
const backend::MachineInfo::RegisterSet* set = nullptr;
|
||||
uint32_t count = 0;
|
||||
std::bitset<32> availability = 0;
|
||||
// TODO(benvanik): another data type.
|
||||
std::vector<RegisterUsage> upcoming_uses;
|
||||
};
|
||||
|
||||
void DumpUsage(const char* name);
|
||||
void PrepareBlockState();
|
||||
void AdvanceUses(hir::Instr* instr);
|
||||
bool IsRegInUse(const hir::RegAssignment& reg);
|
||||
RegisterSetUsage* MarkRegUsed(const hir::RegAssignment& reg,
|
||||
hir::Value* value, hir::Value::Use* use);
|
||||
RegisterSetUsage* MarkRegAvailable(const hir::RegAssignment& reg);
|
||||
|
||||
bool TryAllocateRegister(hir::Value* value,
|
||||
const hir::RegAssignment& preferred_reg);
|
||||
bool TryAllocateRegister(hir::Value* value);
|
||||
bool SpillOneRegister(hir::HIRBuilder* builder, hir::TypeName required_type);
|
||||
|
||||
RegisterSetUsage* RegisterSetForValue(const hir::Value* value);
|
||||
|
||||
void SortUsageList(hir::Value* value);
|
||||
|
||||
private:
|
||||
const backend::MachineInfo* machine_info_;
|
||||
|
||||
struct RegisterFreeUntilSet {
|
||||
uint32_t count;
|
||||
uint32_t pos[32];
|
||||
const backend::MachineInfo::RegisterSet* set;
|
||||
};
|
||||
struct RegisterFreeUntilSets {
|
||||
RegisterFreeUntilSet* int_set;
|
||||
RegisterFreeUntilSet* float_set;
|
||||
RegisterFreeUntilSet* vec_set;
|
||||
RegisterFreeUntilSet* all_sets[3];
|
||||
};
|
||||
RegisterFreeUntilSets free_until_sets_;
|
||||
struct {
|
||||
RegisterSetUsage* int_set = nullptr;
|
||||
RegisterSetUsage* float_set = nullptr;
|
||||
RegisterSetUsage* vec_set = nullptr;
|
||||
RegisterSetUsage* all_sets[3];
|
||||
} usage_sets_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -88,12 +88,12 @@ int ValidationPass::ValidateInstruction(Block* block, Instr* instr) {
|
|||
}
|
||||
|
||||
int ValidationPass::ValidateValue(Block* block, Instr* instr, Value* value) {
|
||||
if (value->def) {
|
||||
/*auto def = value->def;
|
||||
XEASSERT(def->block == block);
|
||||
if (def->block != block) {
|
||||
return 1;
|
||||
}*/
|
||||
}
|
||||
//if (value->def) {
|
||||
// auto def = value->def;
|
||||
// XEASSERT(def->block == block);
|
||||
// if (def->block != block) {
|
||||
// return 1;
|
||||
// }
|
||||
//}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -44,6 +44,10 @@ typedef struct XECACHEALIGN vec128_s {
|
|||
uint64_t high;
|
||||
};
|
||||
};
|
||||
|
||||
bool operator== (const vec128_s& b) const {
|
||||
return low == b.low && high == b.high;
|
||||
}
|
||||
} vec128_t;
|
||||
XEFORCEINLINE vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) {
|
||||
vec128_t v;
|
||||
|
|
|
@ -643,20 +643,20 @@ XEEMITTER(cmpli, 0x28000000, D )(PPCHIRBuilder& f, InstrData& i) {
|
|||
XEEMITTER(andx, 0x7C000038, X )(PPCHIRBuilder& f, InstrData& i) {
|
||||
// RA <- (RS) & (RB)
|
||||
Value* ra = f.And(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(andcx, 0x7C000078, X )(PPCHIRBuilder& f, InstrData& i) {
|
||||
// RA <- (RS) & ¬(RB)
|
||||
Value* ra = f.And(f.LoadGPR(i.X.RT), f.Not(f.LoadGPR(i.X.RB)));
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -665,8 +665,8 @@ XEEMITTER(andix, 0x70000000, D )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* ra = f.And(
|
||||
f.LoadGPR(i.D.RT),
|
||||
f.LoadConstant((uint64_t)i.D.DS));
|
||||
f.UpdateCR(0, ra);
|
||||
f.StoreGPR(i.D.RA, ra);
|
||||
f.UpdateCR(0, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -675,8 +675,8 @@ XEEMITTER(andisx, 0x74000000, D )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* ra = f.And(
|
||||
f.LoadGPR(i.D.RT),
|
||||
f.LoadConstant((uint64_t(i.D.DS) << 16)));
|
||||
f.UpdateCR(0, ra);
|
||||
f.StoreGPR(i.D.RA, ra);
|
||||
f.UpdateCR(0, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -688,10 +688,10 @@ XEEMITTER(cntlzdx, 0x7C000074, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA <- n
|
||||
Value* v = f.CountLeadingZeros(f.LoadGPR(i.X.RT));
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -704,10 +704,10 @@ XEEMITTER(cntlzwx, 0x7C000034, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* v = f.CountLeadingZeros(
|
||||
f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -715,10 +715,10 @@ XEEMITTER(eqvx, 0x7C000238, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA <- (RS) == (RB)
|
||||
Value* ra = f.Xor(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
|
||||
ra = f.Not(ra);
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -728,10 +728,10 @@ XEEMITTER(extsbx, 0x7C000774, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA[0:55] <- i56.s
|
||||
Value* rt = f.LoadGPR(i.X.RT);
|
||||
rt = f.SignExtend(f.Truncate(rt, INT8_TYPE), INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, rt);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -741,10 +741,10 @@ XEEMITTER(extshx, 0x7C000734, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA[0:47] <- 48.s
|
||||
Value* rt = f.LoadGPR(i.X.RT);
|
||||
rt = f.SignExtend(f.Truncate(rt, INT16_TYPE), INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, rt);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -754,10 +754,10 @@ XEEMITTER(extswx, 0x7C0007B4, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA[0:31] <- i32.s
|
||||
Value* rt = f.LoadGPR(i.X.RT);
|
||||
rt = f.SignExtend(f.Truncate(rt, INT32_TYPE), INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, rt);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -767,10 +767,10 @@ XEEMITTER(nandx, 0x7C0003B8, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
f.LoadGPR(i.X.RT),
|
||||
f.LoadGPR(i.X.RB));
|
||||
ra = f.Not(ra);
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -780,10 +780,10 @@ XEEMITTER(norx, 0x7C0000F8, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
f.LoadGPR(i.X.RT),
|
||||
f.LoadGPR(i.X.RB));
|
||||
ra = f.Not(ra);
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -803,10 +803,10 @@ XEEMITTER(orx, 0x7C000378, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
f.LoadGPR(i.X.RT),
|
||||
f.LoadGPR(i.X.RB));
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -815,10 +815,10 @@ XEEMITTER(orcx, 0x7C000338, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* ra = f.Or(
|
||||
f.LoadGPR(i.X.RT),
|
||||
f.Not(f.LoadGPR(i.X.RB)));
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -849,10 +849,10 @@ XEEMITTER(xorx, 0x7C000278, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* ra = f.Xor(
|
||||
f.LoadGPR(i.X.RT),
|
||||
f.LoadGPR(i.X.RB));
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -895,10 +895,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
|
|||
if (m != 0xFFFFFFFFFFFFFFFF) {
|
||||
v = f.And(v, f.LoadConstant(m));
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
if (i.MD.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
return 0;
|
||||
} else if (i.MD.idx == 1) {
|
||||
// XEEMITTER(rldicrx, 0x78000004, MD )
|
||||
|
@ -922,10 +922,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
|
|||
v = f.And(v, f.LoadConstant(m));
|
||||
}
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
if (i.MD.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
return 0;
|
||||
} else if (i.MD.idx == 2) {
|
||||
// XEEMITTER(rldicx, 0x78000008, MD )
|
||||
|
@ -959,10 +959,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
|
|||
f.And(v, f.LoadConstant(m)),
|
||||
f.And(ra, f.LoadConstant(~m)));
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
if (i.MD.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
return 0;
|
||||
} else {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
|
@ -987,10 +987,10 @@ XEEMITTER(rlwimix, 0x50000000, M )(PPCHIRBuilder& f, InstrData& i) {
|
|||
}
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstant((~(uint64_t)m))));
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
if (i.M.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1014,10 +1014,10 @@ XEEMITTER(rlwinmx, 0x54000000, M )(PPCHIRBuilder& f, InstrData& i) {
|
|||
v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32)));
|
||||
}
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
if (i.M.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1036,10 +1036,10 @@ XEEMITTER(rlwnmx, 0x5C000000, M )(PPCHIRBuilder& f, InstrData& i) {
|
|||
v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32)));
|
||||
}
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
if (i.M.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1146,7 +1146,7 @@ XEEMITTER(sradx, 0x7C000634, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// CA is set to 1 if the low-order 32 bits of (RS) contain a negative number
|
||||
// and any 1-bits are shifted out of position 63; otherwise CA is set to 0.
|
||||
// We already have ca set to indicate the pos 63 bit, now just and in sign.
|
||||
ca = f.And(ca, f.Shr(v, 63));
|
||||
ca = f.And(ca, f.Truncate(f.Shr(v, 63), INT8_TYPE));
|
||||
|
||||
f.StoreCA(ca);
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
|
@ -1174,15 +1174,15 @@ XEEMITTER(sradix, 0x7C000674, XS )(PPCHIRBuilder& f, InstrData& i) {
|
|||
XEASSERT(sh);
|
||||
uint64_t mask = XEMASK(64 - sh, 63);
|
||||
Value* ca = f.And(
|
||||
f.Shr(v, 63),
|
||||
f.Truncate(f.Shr(v, 63), INT8_TYPE),
|
||||
f.IsTrue(f.And(v, f.LoadConstant(mask))));
|
||||
f.StoreCA(ca);
|
||||
|
||||
v = f.Sha(v, sh);
|
||||
f.StoreGPR(i.XS.RA, v);
|
||||
if (i.XS.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.XS.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1203,7 +1203,7 @@ XEEMITTER(srawx, 0x7C000630, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// is negative.
|
||||
Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh));
|
||||
Value* ca = f.And(
|
||||
f.Shr(v, 31),
|
||||
f.Truncate(f.Shr(v, 31), INT8_TYPE),
|
||||
f.IsTrue(f.And(v, mask)));
|
||||
f.StoreCA(ca);
|
||||
v = f.Sha(v, sh),
|
||||
|
@ -1235,8 +1235,8 @@ XEEMITTER(srawix, 0x7C000670, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// is negative.
|
||||
uint32_t mask = (uint32_t)XEMASK(64 - i.X.RB, 63);
|
||||
ca = f.And(
|
||||
f.Shr(v, 31),
|
||||
f.ZeroExtend(f.IsTrue(f.And(v, f.LoadConstant(mask))), INT32_TYPE));
|
||||
f.Truncate(f.Shr(v, 31), INT8_TYPE),
|
||||
f.IsTrue(f.And(v, f.LoadConstant(mask))));
|
||||
|
||||
v = f.Sha(v, (int8_t)i.X.RB),
|
||||
v = f.SignExtend(v, INT64_TYPE);
|
||||
|
|
|
@ -240,18 +240,18 @@ void PPCHIRBuilder::UpdateCR(
|
|||
|
||||
void PPCHIRBuilder::UpdateCR(
|
||||
uint32_t n, Value* lhs, Value* rhs, bool is_signed) {
|
||||
Value* lt;
|
||||
Value* gt;
|
||||
if (is_signed) {
|
||||
lt = CompareSLT(lhs, rhs);
|
||||
gt = CompareSGT(lhs, rhs);
|
||||
Value* lt = CompareSLT(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
|
||||
Value* gt = CompareSGT(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
|
||||
} else {
|
||||
lt = CompareULT(lhs, rhs);
|
||||
gt = CompareUGT(lhs, rhs);
|
||||
Value* lt = CompareULT(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
|
||||
Value* gt = CompareUGT(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
|
||||
}
|
||||
Value* eq = CompareEQ(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 2, eq);
|
||||
|
||||
// Value* so = AllocValue(UINT8_TYPE);
|
||||
|
@ -280,7 +280,7 @@ Value* PPCHIRBuilder::LoadCA() {
|
|||
}
|
||||
|
||||
void PPCHIRBuilder::StoreCA(Value* value) {
|
||||
value = Truncate(value, INT8_TYPE);
|
||||
XEASSERT(value->type == INT8_TYPE);
|
||||
StoreContext(offsetof(PPCContext, xer_ca), value);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/hir/block.h>
|
||||
|
||||
#include <alloy/hir/instr.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::hir;
|
||||
|
||||
|
||||
void Block::AssertNoCycles() {
|
||||
Instr* hare = instr_head;
|
||||
Instr* tortoise = instr_head;
|
||||
if (!hare) {
|
||||
return;
|
||||
}
|
||||
while (hare = hare->next) {
|
||||
if (hare == tortoise) {
|
||||
// Cycle!
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
hare = hare->next;
|
||||
if (hare == tortoise) {
|
||||
// Cycle!
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
tortoise = tortoise->next;
|
||||
if (!hare || !tortoise) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -61,6 +61,8 @@ public:
|
|||
Instr* instr_tail;
|
||||
|
||||
uint16_t ordinal;
|
||||
|
||||
void AssertNoCycles();
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) {
|
|||
case INT8_TYPE: str->Append("%X", value->constant.i8); break;
|
||||
case INT16_TYPE: str->Append("%X", value->constant.i16); break;
|
||||
case INT32_TYPE: str->Append("%X", value->constant.i32); break;
|
||||
case INT64_TYPE: str->Append("%X", value->constant.i64); break;
|
||||
case INT64_TYPE: str->Append("%llX", value->constant.i64); break;
|
||||
case FLOAT32_TYPE: str->Append("%F", value->constant.f32); break;
|
||||
case FLOAT64_TYPE: str->Append("%F", value->constant.f64); break;
|
||||
case VEC128_TYPE: str->Append("(%F,%F,%F,%F)",
|
||||
|
@ -252,6 +252,29 @@ void HIRBuilder::Dump(StringBuffer* str) {
|
|||
}
|
||||
}
|
||||
|
||||
void HIRBuilder::AssertNoCycles() {
|
||||
Block* hare = block_head_;
|
||||
Block* tortoise = block_head_;
|
||||
if (!hare) {
|
||||
return;
|
||||
}
|
||||
while (hare = hare->next) {
|
||||
if (hare == tortoise) {
|
||||
// Cycle!
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
hare = hare->next;
|
||||
if (hare == tortoise) {
|
||||
// Cycle!
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
tortoise = tortoise->next;
|
||||
if (!hare || !tortoise) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Block* HIRBuilder::current_block() const {
|
||||
return current_block_;
|
||||
}
|
||||
|
@ -1729,16 +1752,19 @@ Value* HIRBuilder::Extract(Value* value, Value* index,
|
|||
TypeName target_type) {
|
||||
// TODO(benvanik): could do some of this as constants.
|
||||
|
||||
Value* trunc_index = index->type != INT8_TYPE ?
|
||||
Truncate(index, INT8_TYPE) : index;
|
||||
|
||||
Instr* i = AppendInstr(
|
||||
OPCODE_EXTRACT_info, 0,
|
||||
AllocValue(target_type));
|
||||
i->set_src1(value);
|
||||
i->set_src2(ZeroExtend(index, INT64_TYPE));
|
||||
i->set_src2(trunc_index);
|
||||
i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::Extract(Value* value, uint64_t index,
|
||||
Value* HIRBuilder::Extract(Value* value, uint8_t index,
|
||||
TypeName target_type) {
|
||||
return Extract(value, LoadConstant(index), target_type);
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ public:
|
|||
virtual int Finalize();
|
||||
|
||||
void Dump(StringBuffer* str);
|
||||
void AssertNoCycles();
|
||||
|
||||
Arena* arena() const { return arena_; }
|
||||
|
||||
|
@ -196,7 +197,7 @@ public:
|
|||
Value* Insert(Value* value, Value* index, Value* part);
|
||||
Value* Insert(Value* value, uint64_t index, Value* part);
|
||||
Value* Extract(Value* value, Value* index, TypeName target_type);
|
||||
Value* Extract(Value* value, uint64_t index, TypeName target_type);
|
||||
Value* Extract(Value* value, uint8_t index, TypeName target_type);
|
||||
// i8->i16/i32/... (i8|i8 / i8|i8|i8|i8 / ...)
|
||||
// i8/i16/i32 -> vec128
|
||||
Value* Splat(Value* value, TypeName target_type);
|
||||
|
|
|
@ -48,19 +48,6 @@ void Instr::set_src3(Value* value) {
|
|||
src3_use = value ? value->AddUse(block->arena, this) : NULL;
|
||||
}
|
||||
|
||||
bool Instr::Match(SignatureType dest_req,
|
||||
SignatureType src1_req,
|
||||
SignatureType src2_req,
|
||||
SignatureType src3_req) const {
|
||||
#define TO_SIG_TYPE(v) \
|
||||
(v ? (v->IsConstant() ? SignatureType((v->type + 1) | SIG_TYPE_C) : SignatureType(v->type + 1)) : SIG_TYPE_X)
|
||||
return
|
||||
((dest_req == SIG_TYPE_IGNORE) || (dest_req == TO_SIG_TYPE(dest))) &&
|
||||
((src1_req == SIG_TYPE_IGNORE) || (src1_req == TO_SIG_TYPE(src1.value))) &&
|
||||
((src2_req == SIG_TYPE_IGNORE) || (src2_req == TO_SIG_TYPE(src2.value))) &&
|
||||
((src3_req == SIG_TYPE_IGNORE) || (src3_req == TO_SIG_TYPE(src3.value)));
|
||||
}
|
||||
|
||||
void Instr::MoveBefore(Instr* other) {
|
||||
if (next == other) {
|
||||
return;
|
||||
|
|
|
@ -24,26 +24,6 @@ namespace hir {
|
|||
class Block;
|
||||
class Label;
|
||||
|
||||
enum SignatureType {
|
||||
SIG_TYPE_X = 0,
|
||||
SIG_TYPE_I8 = 1,
|
||||
SIG_TYPE_I16 = 2,
|
||||
SIG_TYPE_I32 = 3,
|
||||
SIG_TYPE_I64 = 4,
|
||||
SIG_TYPE_F32 = 5,
|
||||
SIG_TYPE_F64 = 6,
|
||||
SIG_TYPE_V128 = 7,
|
||||
SIG_TYPE_C = (1 << 3),
|
||||
SIG_TYPE_I8C = SIG_TYPE_C | SIG_TYPE_I8,
|
||||
SIG_TYPE_I16C = SIG_TYPE_C | SIG_TYPE_I16,
|
||||
SIG_TYPE_I32C = SIG_TYPE_C | SIG_TYPE_I32,
|
||||
SIG_TYPE_I64C = SIG_TYPE_C | SIG_TYPE_I64,
|
||||
SIG_TYPE_F32C = SIG_TYPE_C | SIG_TYPE_F32,
|
||||
SIG_TYPE_F64C = SIG_TYPE_C | SIG_TYPE_F64,
|
||||
SIG_TYPE_V128C = SIG_TYPE_C | SIG_TYPE_V128,
|
||||
SIG_TYPE_IGNORE = 0xFF,
|
||||
};
|
||||
|
||||
class Instr {
|
||||
public:
|
||||
Block* block;
|
||||
|
@ -74,11 +54,6 @@ public:
|
|||
void set_src2(Value* value);
|
||||
void set_src3(Value* value);
|
||||
|
||||
bool Match(SignatureType dest = SIG_TYPE_X,
|
||||
SignatureType src1 = SIG_TYPE_X,
|
||||
SignatureType src2 = SIG_TYPE_X,
|
||||
SignatureType src3 = SIG_TYPE_X) const;
|
||||
|
||||
void MoveBefore(Instr* other);
|
||||
void Replace(const OpcodeInfo* opcode, uint16_t flags);
|
||||
void Remove();
|
||||
|
|
|
@ -11,590 +11,590 @@
|
|||
DEFINE_OPCODE(
|
||||
OPCODE_COMMENT,
|
||||
"comment",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_IGNORE);
|
||||
OPCODE_SIG_X_O,
|
||||
OPCODE_FLAG_IGNORE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_NOP,
|
||||
"nop",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_IGNORE);
|
||||
OPCODE_FLAG_IGNORE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SOURCE_OFFSET,
|
||||
"source_offset",
|
||||
OPCODE_SIG_X_O,
|
||||
OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE);
|
||||
OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DEBUG_BREAK,
|
||||
"debug_break",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DEBUG_BREAK_TRUE,
|
||||
"debug_break_true",
|
||||
OPCODE_SIG_X_V,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_TRAP,
|
||||
"trap",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_TRAP_TRUE,
|
||||
"trap_true",
|
||||
OPCODE_SIG_X_V,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL,
|
||||
"call",
|
||||
OPCODE_SIG_X_S,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL_TRUE,
|
||||
"call_true",
|
||||
OPCODE_SIG_X_V_S,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL_INDIRECT,
|
||||
"call_indirect",
|
||||
OPCODE_SIG_X_V,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL_INDIRECT_TRUE,
|
||||
"call_indirect_true",
|
||||
OPCODE_SIG_X_V_V,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL_EXTERN,
|
||||
"call_extern",
|
||||
OPCODE_SIG_X_S,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_RETURN,
|
||||
"return",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_RETURN_TRUE,
|
||||
"return_true",
|
||||
OPCODE_SIG_X_V,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SET_RETURN_ADDRESS,
|
||||
"set_return_address",
|
||||
OPCODE_SIG_X_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BRANCH,
|
||||
"branch",
|
||||
OPCODE_SIG_X_L,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BRANCH_TRUE,
|
||||
"branch_true",
|
||||
OPCODE_SIG_X_V_L,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BRANCH_FALSE,
|
||||
"branch_false",
|
||||
OPCODE_SIG_X_V_L,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ASSIGN,
|
||||
"assign",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CAST,
|
||||
"cast",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ZERO_EXTEND,
|
||||
"zero_extend",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SIGN_EXTEND,
|
||||
"sign_extend",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_TRUNCATE,
|
||||
"truncate",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CONVERT,
|
||||
"convert",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ROUND,
|
||||
"round",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_CONVERT_I2F,
|
||||
"vector_convert_i2f",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_CONVERT_F2I,
|
||||
"vector_convert_f2i",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_VECTOR_SHL,
|
||||
"load_vector_shl",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_VECTOR_SHR,
|
||||
"load_vector_shr",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_CLOCK,
|
||||
"load_clock",
|
||||
OPCODE_SIG_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_LOCAL,
|
||||
"load_local",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE_LOCAL,
|
||||
"store_local",
|
||||
OPCODE_SIG_X_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_CONTEXT,
|
||||
"load_context",
|
||||
OPCODE_SIG_V_O,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE_CONTEXT,
|
||||
"store_context",
|
||||
OPCODE_SIG_X_O_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD,
|
||||
"load",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_MEMORY);
|
||||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE,
|
||||
"store",
|
||||
OPCODE_SIG_X_V_V,
|
||||
OPCODE_FLAG_MEMORY);
|
||||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_PREFETCH,
|
||||
"prefetch",
|
||||
OPCODE_SIG_X_V_O,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MAX,
|
||||
"max",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MIN,
|
||||
"min",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SELECT,
|
||||
"select",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_IS_TRUE,
|
||||
"is_true",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_IS_FALSE,
|
||||
"is_false",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_EQ,
|
||||
"compare_eq",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_NE,
|
||||
"compare_ne",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_SLT,
|
||||
"compare_slt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_SLE,
|
||||
"compare_sle",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_SGT,
|
||||
"compare_sgt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_SGE,
|
||||
"compare_sge",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_ULT,
|
||||
"compare_ult",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_ULE,
|
||||
"compare_ule",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_UGT,
|
||||
"compare_ugt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_UGE,
|
||||
"compare_uge",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DID_CARRY,
|
||||
"did_carry",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_PAIRED_PREV);
|
||||
OPCODE_FLAG_PAIRED_PREV)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DID_OVERFLOW,
|
||||
"did_overflow",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_PAIRED_PREV);
|
||||
OPCODE_FLAG_PAIRED_PREV)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DID_SATURATE,
|
||||
"did_saturate",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_PAIRED_PREV);
|
||||
OPCODE_FLAG_PAIRED_PREV)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_EQ,
|
||||
"vector_compare_eq",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_SGT,
|
||||
"vector_compare_sgt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_SGE,
|
||||
"vector_compare_sge",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_UGT,
|
||||
"vector_compare_ugt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_UGE,
|
||||
"vector_compare_uge",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ADD,
|
||||
"add",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ADD_CARRY,
|
||||
"add_carry",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_ADD,
|
||||
"vector_add",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SUB,
|
||||
"sub",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL,
|
||||
"mul",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL_HI,
|
||||
"mul_hi",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DIV,
|
||||
"div",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL_ADD,
|
||||
"mul_add",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL_SUB,
|
||||
"mul_sub",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_NEG,
|
||||
"neg",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ABS,
|
||||
"abs",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SQRT,
|
||||
"sqrt",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_RSQRT,
|
||||
"rsqrt",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_POW2,
|
||||
"pow2",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOG2,
|
||||
"log2",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DOT_PRODUCT_3,
|
||||
"dot_product_3",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DOT_PRODUCT_4,
|
||||
"dot_product_4",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_AND,
|
||||
"and",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_OR,
|
||||
"or",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_XOR,
|
||||
"xor",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_NOT,
|
||||
"not",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SHL,
|
||||
"shl",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_SHL,
|
||||
"vector_shl",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SHR,
|
||||
"shr",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_SHR,
|
||||
"vector_shr",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SHA,
|
||||
"sha",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_SHA,
|
||||
"vector_sha",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ROTATE_LEFT,
|
||||
"rotate_left",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BYTE_SWAP,
|
||||
"byte_swap",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CNTLZ,
|
||||
"cntlz",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_INSERT,
|
||||
"insert",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_EXTRACT,
|
||||
"extract",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SPLAT,
|
||||
"splat",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_PERMUTE,
|
||||
"permute",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SWIZZLE,
|
||||
"swizzle",
|
||||
OPCODE_SIG_V_V_O,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_PACK,
|
||||
"pack",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_UNPACK,
|
||||
"unpack",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_EXCHANGE,
|
||||
"compare_exchange",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ATOMIC_EXCHANGE,
|
||||
"atomic_exchange",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ATOMIC_ADD,
|
||||
"atomic_add",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ATOMIC_SUB,
|
||||
"atomic_sub",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# Copyright 2013 Ben Vanik. All Rights Reserved.
|
||||
{
|
||||
'sources': [
|
||||
'block.cc',
|
||||
'block.h',
|
||||
'hir_builder.cc',
|
||||
'hir_builder.h',
|
||||
|
|
|
@ -560,6 +560,26 @@ void Value::ByteSwap() {
|
|||
}
|
||||
}
|
||||
|
||||
void Value::CountLeadingZeros(const ConstantValue& src) {
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
constant.i8 = __lzcnt16(src.i8) - 8;
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
constant.i8 = __lzcnt16(src.i16);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
constant.i8 = __lzcnt(src.i32);
|
||||
break;
|
||||
case INT64_TYPE:
|
||||
constant.i8 = __lzcnt64(src.i64);
|
||||
break;
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool Value::Compare(Opcode opcode, Value* other) {
|
||||
// TODO(benvanik): big matrix.
|
||||
XEASSERTALWAYS();
|
||||
|
|
|
@ -68,6 +68,10 @@ enum ValueFlags {
|
|||
VALUE_IS_ALLOCATED = (1 << 2), // Used by backends. Do not set.
|
||||
};
|
||||
|
||||
struct RegAssignment {
|
||||
const backend::MachineInfo::RegisterSet* set;
|
||||
int32_t index;
|
||||
};
|
||||
|
||||
class Value {
|
||||
public:
|
||||
|
@ -91,10 +95,7 @@ public:
|
|||
TypeName type;
|
||||
|
||||
uint32_t flags;
|
||||
struct {
|
||||
const backend::MachineInfo::RegisterSet* set;
|
||||
int32_t index;
|
||||
} reg;
|
||||
RegAssignment reg;
|
||||
ConstantValue constant;
|
||||
|
||||
Instr* def;
|
||||
|
@ -392,6 +393,7 @@ public:
|
|||
void Shr(Value* other);
|
||||
void Sha(Value* other);
|
||||
void ByteSwap();
|
||||
void CountLeadingZeros(const ConstantValue& src);
|
||||
bool Compare(Opcode opcode, Value* other);
|
||||
};
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 702d6e6683c322f08a36ea059f6d6f8263b1bd0d
|
||||
Subproject commit 2d599b3bd64a6d13c8b47a5f7410c67837bfff5d
|
13
xenia.gyp
13
xenia.gyp
|
@ -24,6 +24,18 @@
|
|||
'target_arch%': 'x64',
|
||||
},
|
||||
|
||||
'conditions': [
|
||||
['OS=="win"', {
|
||||
'variables': {
|
||||
'move_command%': 'move'
|
||||
},
|
||||
}, {
|
||||
'variables': {
|
||||
'move_command%': 'mv'
|
||||
},
|
||||
}]
|
||||
],
|
||||
|
||||
'target_defaults': {
|
||||
'include_dirs': [
|
||||
'include/',
|
||||
|
@ -255,6 +267,7 @@
|
|||
'include_dirs': [
|
||||
'.',
|
||||
'src/',
|
||||
'<(INTERMEDIATE_DIR)',
|
||||
],
|
||||
|
||||
'includes': [
|
||||
|
|
Loading…
Reference in New Issue