Merge pull request #1 from benvanik/master

Latest from base
This commit is contained in:
x1nixmzeng 2014-07-05 23:36:05 +01:00
commit 66dc31a01c
233 changed files with 24938 additions and 8621 deletions

View File

@ -1,8 +1,7 @@
Xenia - Xbox 360 Emulator Research Project Xenia - Xbox 360 Emulator Research Project
========================================== ==========================================
Xenia is an experimental emulator for the Xbox 360. It does not run games (yet), Xenia is an experimental emulator for the Xbox 360. It does not run games (yet).
and if you are unable to understand that please leave now.
Pull requests are welcome but the code is in a very high churn state and may not Pull requests are welcome but the code is in a very high churn state and may not
be accepted, so ask in IRC before taking on anything big. Contributions are be accepted, so ask in IRC before taking on anything big. Contributions are
@ -64,10 +63,10 @@ doing.
Fixes and optimizations are always welcome (please!), but in addition to Fixes and optimizations are always welcome (please!), but in addition to
that there are some major work areas still untouched: that there are some major work areas still untouched:
* Help work through missing functionality/bugs in game [compat](https://github.com/benvanik/xenia/issues?labels=compat)
* Write an [OpenGL driver](https://github.com/benvanik/xenia/issues/59) * Write an [OpenGL driver](https://github.com/benvanik/xenia/issues/59)
* Add input drivers for [OSX](https://github.com/benvanik/xenia/issues/61) and [PS4 controllers](https://github.com/benvanik/xenia/issues/60) (or anything else) * Add input drivers for [OSX](https://github.com/benvanik/xenia/issues/61) and [PS4 controllers](https://github.com/benvanik/xenia/issues/60) (or anything else)
* Start [hacking on audio](https://github.com/benvanik/xenia/issues/62) * Start [hacking on audio](https://github.com/benvanik/xenia/issues/62)
* Support [loading of PIRS files](https://github.com/benvanik/xenia/issues/63)
* Build a [virtual LIVE service](https://github.com/benvanik/xenia/issues/64) * Build a [virtual LIVE service](https://github.com/benvanik/xenia/issues/64)
See more projects [good for contributors](https://github.com/benvanik/xenia/issues?labels=good+for+contributors&page=1&state=open). It's a good idea to ask on IRC/the bugs before beginning work See more projects [good for contributors](https://github.com/benvanik/xenia/issues?labels=good+for+contributors&page=1&state=open). It's a good idea to ask on IRC/the bugs before beginning work
@ -85,11 +84,9 @@ Come on people. Jeez.
### What kind of machine do I need to run this? ### What kind of machine do I need to run this?
You'll need 64-bit Windows 7 with a processor supporting at least SSE4. You'll need 64-bit Windows 8 with a processor supporting at least AVX2 - in
It's only tested on Windows 8 and that may become a requirement as several of other words, a Haswell. In general if you have to ask if your machine is good
the APIs exposed there are beneficial to emulation. In general if you have to enough to run games at a decent speed the answer is no.
ask if your machine is good enough to run games at a decent speed the answer is
no.
### What about Linux/OSX? ### What about Linux/OSX?

View File

@ -18,6 +18,11 @@
DECLARE_bool(debug); DECLARE_bool(debug);
DECLARE_bool(always_disasm); DECLARE_bool(always_disasm);
DECLARE_bool(validate_hir);
DECLARE_uint64(break_on_instruction);
DECLARE_uint64(break_on_memory);
namespace alloy { namespace alloy {

View File

@ -21,6 +21,14 @@ using namespace alloy;
DEFINE_bool(debug, DEFAULT_DEBUG_FLAG, DEFINE_bool(debug, DEFAULT_DEBUG_FLAG,
"Allow debugging and retain debug information."); "Allow debugging and retain debug information.");
DEFINE_bool(always_disasm, false, DEFINE_bool(always_disasm, false,
"Always add debug info to functions, even when no debugger is attached."); "Always add debug info to functions, even when no debugger is attached.");
DEFINE_bool(validate_hir, false,
"Perform validation checks on the HIR during compilation.");
// Breakpoints:
DEFINE_uint64(break_on_instruction, 0,
"int3 before the given guest address is executed.");
DEFINE_uint64(break_on_memory, 0,
"int3 on read/write to the given memory address.");

View File

@ -18,6 +18,7 @@ using namespace alloy::runtime;
Backend::Backend(Runtime* runtime) : Backend::Backend(Runtime* runtime) :
runtime_(runtime) { runtime_(runtime) {
xe_zero_struct(&machine_info_, sizeof(machine_info_));
} }
Backend::~Backend() { Backend::~Backend() {

View File

@ -11,6 +11,7 @@
#define ALLOY_BACKEND_BACKEND_H_ #define ALLOY_BACKEND_BACKEND_H_
#include <alloy/core.h> #include <alloy/core.h>
#include <alloy/backend/machine_info.h>
namespace alloy { namespace runtime { class Runtime; } } namespace alloy { namespace runtime { class Runtime; } }
@ -27,6 +28,7 @@ public:
virtual ~Backend(); virtual ~Backend();
runtime::Runtime* runtime() const { return runtime_; } runtime::Runtime* runtime() const { return runtime_; }
const MachineInfo* machine_info() const { return &machine_info_; }
virtual int Initialize(); virtual int Initialize();
@ -37,6 +39,7 @@ public:
protected: protected:
runtime::Runtime* runtime_; runtime::Runtime* runtime_;
MachineInfo machine_info_;
}; };

View File

@ -61,7 +61,6 @@ int IVMAssembler::Assemble(
fn->set_debug_info(debug_info); fn->set_debug_info(debug_info);
TranslationContext ctx; TranslationContext ctx;
ctx.access_callbacks = backend_->runtime()->access_callbacks();
ctx.register_count = 0; ctx.register_count = 0;
ctx.intcode_count = 0; ctx.intcode_count = 0;
ctx.intcode_arena = &intcode_arena_; ctx.intcode_arena = &intcode_arena_;
@ -74,6 +73,19 @@ int IVMAssembler::Assemble(
builder->ResetLabelTags(); builder->ResetLabelTags();
// Function prologue. // Function prologue.
size_t stack_offset = 0;
auto locals = builder->locals();
for (auto it = locals.begin(); it != locals.end(); ++it) {
auto slot = *it;
size_t type_size = GetTypeSize(slot->type);
// Align to natural size.
stack_offset = XEALIGN(stack_offset, type_size);
slot->set_constant((uint32_t)stack_offset);
stack_offset += type_size;
}
// Ensure 16b alignment.
stack_offset = XEALIGN(stack_offset, 16);
ctx.stack_size = stack_offset;
auto block = builder->first_block(); auto block = builder->first_block();
while (block) { while (block) {
@ -96,7 +108,7 @@ int IVMAssembler::Assemble(
// Fixup label references. // Fixup label references.
LabelRef* label_ref = ctx.label_ref_head; LabelRef* label_ref = ctx.label_ref_head;
while (label_ref) { while (label_ref) {
label_ref->instr->src1_reg = (uint32_t)label_ref->label->tag & ~0x80000000; label_ref->instr->src1_reg = (uint32_t)(intptr_t)label_ref->label->tag & ~0x80000000;
label_ref = label_ref->next; label_ref = label_ref->next;
} }

View File

@ -34,6 +34,20 @@ int IVMBackend::Initialize() {
return result; return result;
} }
machine_info_.register_sets[0] = {
0,
"gpr",
MachineInfo::RegisterSet::INT_TYPES,
16,
};
machine_info_.register_sets[1] = {
1,
"vec",
MachineInfo::RegisterSet::FLOAT_TYPES |
MachineInfo::RegisterSet::VEC_TYPES,
16,
};
alloy::tracing::WriteEvent(EventType::Init({ alloy::tracing::WriteEvent(EventType::Init({
})); }));

View File

@ -23,7 +23,7 @@ using namespace alloy::runtime;
IVMFunction::IVMFunction(FunctionInfo* symbol_info) : IVMFunction::IVMFunction(FunctionInfo* symbol_info) :
register_count_(0), intcode_count_(0), intcodes_(0), register_count_(0), intcode_count_(0), intcodes_(0),
source_map_count_(0), source_map_(0), source_map_count_(0), source_map_(0),
GuestFunction(symbol_info) { Function(symbol_info) {
} }
IVMFunction::~IVMFunction() { IVMFunction::~IVMFunction() {
@ -33,6 +33,7 @@ IVMFunction::~IVMFunction() {
void IVMFunction::Setup(TranslationContext& ctx) { void IVMFunction::Setup(TranslationContext& ctx) {
register_count_ = ctx.register_count; register_count_ = ctx.register_count;
stack_size_ = ctx.stack_size;
intcode_count_ = ctx.intcode_count; intcode_count_ = ctx.intcode_count;
intcodes_ = (IntCode*)ctx.intcode_arena->CloneContents(); intcodes_ = (IntCode*)ctx.intcode_arena->CloneContents();
source_map_count_ = ctx.source_map_count; source_map_count_ = ctx.source_map_count;
@ -104,22 +105,25 @@ void IVMFunction::OnBreakpointHit(ThreadState* thread_state, IntCode* i) {
#undef TRACE_SOURCE_OFFSET #undef TRACE_SOURCE_OFFSET
int IVMFunction::CallImpl(ThreadState* thread_state) { int IVMFunction::CallImpl(ThreadState* thread_state, uint64_t return_address) {
// Setup register file on stack. // Setup register file on stack.
auto stack = (IVMStack*)thread_state->backend_data(); auto stack = (IVMStack*)thread_state->backend_data();
auto register_file = (Register*)stack->Alloc(register_count_); auto register_file = (Register*)stack->Alloc(register_count_);
auto local_stack = (uint8_t*)alloca(stack_size_);
Memory* memory = thread_state->memory(); Memory* memory = thread_state->memory();
IntCodeState ics; IntCodeState ics;
ics.rf = register_file; ics.rf = register_file;
ics.locals = local_stack;
ics.context = (uint8_t*)thread_state->raw_context(); ics.context = (uint8_t*)thread_state->raw_context();
ics.membase = memory->membase(); ics.membase = memory->membase();
ics.reserve_address = memory->reserve_address(); ics.page_table = ics.membase + memory->page_table();
ics.did_carry = 0; ics.did_carry = 0;
ics.did_saturate = 0; ics.did_saturate = 0;
ics.access_callbacks = thread_state->runtime()->access_callbacks();
ics.thread_state = thread_state; ics.thread_state = thread_state;
ics.return_address = return_address;
ics.call_return_address = 0;
volatile int* suspend_flag_address = thread_state->suspend_flag_address(); volatile int* suspend_flag_address = thread_state->suspend_flag_address();

View File

@ -21,7 +21,7 @@ namespace backend {
namespace ivm { namespace ivm {
class IVMFunction : public runtime::GuestFunction { class IVMFunction : public runtime::Function {
public: public:
IVMFunction(runtime::FunctionInfo* symbol_info); IVMFunction(runtime::FunctionInfo* symbol_info);
virtual ~IVMFunction(); virtual ~IVMFunction();
@ -31,7 +31,8 @@ public:
protected: protected:
virtual int AddBreakpointImpl(runtime::Breakpoint* breakpoint); virtual int AddBreakpointImpl(runtime::Breakpoint* breakpoint);
virtual int RemoveBreakpointImpl(runtime::Breakpoint* breakpoint); virtual int RemoveBreakpointImpl(runtime::Breakpoint* breakpoint);
virtual int CallImpl(runtime::ThreadState* thread_state); virtual int CallImpl(runtime::ThreadState* thread_state,
uint64_t return_address);
private: private:
IntCode* GetIntCodeAtSourceOffset(uint64_t offset); IntCode* GetIntCodeAtSourceOffset(uint64_t offset);
@ -39,6 +40,7 @@ private:
private: private:
size_t register_count_; size_t register_count_;
size_t stack_size_;
size_t intcode_count_; size_t intcode_count_;
IntCode* intcodes_; IntCode* intcodes_;
size_t source_map_count_; size_t source_map_count_;

View File

@ -196,213 +196,6 @@ int DispatchToC(TranslationContext& ctx, Instr* i, IntCodeFn fn) {
return 0; return 0;
} }
uint32_t IntCode_LOAD_REGISTER_I8(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
(i->src2_reg | ((uint64_t)i->src3_reg << 32));
ics.rf[i->dest_reg].i8 = (int8_t)cbs->read(cbs->context, address);
return IA_NEXT;
}
uint32_t IntCode_LOAD_REGISTER_I16(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
(i->src2_reg | ((uint64_t)i->src3_reg << 32));
ics.rf[i->dest_reg].i16 = XESWAP16((int16_t)cbs->read(cbs->context, address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_REGISTER_I32(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
(i->src2_reg | ((uint64_t)i->src3_reg << 32));
ics.rf[i->dest_reg].i32 = XESWAP32((int32_t)cbs->read(cbs->context, address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_REGISTER_I64(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
(i->src2_reg | ((uint64_t)i->src3_reg << 32));
ics.rf[i->dest_reg].i64 = XESWAP64((int64_t)cbs->read(cbs->context, address));
return IA_NEXT;
}
int DispatchRegisterRead(
TranslationContext& ctx, Instr* i, RegisterAccessCallbacks* cbs) {
static IntCodeFn fns[] = {
IntCode_LOAD_REGISTER_I8,
IntCode_LOAD_REGISTER_I16,
IntCode_LOAD_REGISTER_I32,
IntCode_LOAD_REGISTER_I64,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
IntCodeFn fn = fns[i->dest->type];
XEASSERT(fn != IntCode_INVALID_TYPE);
uint32_t dest_reg = AllocDynamicRegister(ctx, i->dest);
uint32_t src1_reg = AllocOpRegister(ctx, OPCODE_SIG_TYPE_V, &i->src1);
ctx.intcode_count++;
IntCode* ic = ctx.intcode_arena->Alloc<IntCode>();
ic->intcode_fn = fn;
ic->flags = i->flags;
ic->debug_flags = 0;
ic->dest_reg = dest_reg;
ic->src1_reg = src1_reg;
ic->src2_reg = (uint32_t)((uint64_t)cbs);
ic->src3_reg = (uint32_t)(((uint64_t)cbs) >> 32);
return 0;
}
uint32_t IntCode_LOAD_REGISTER_I8_DYNAMIC(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = ics.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
ics.rf[i->dest_reg].i8 = (int8_t)cbs->read(cbs->context, address);
return IA_NEXT;
}
cbs = cbs->next;
}
return IA_NEXT;
}
uint32_t IntCode_LOAD_REGISTER_I16_DYNAMIC(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = ics.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
ics.rf[i->dest_reg].i16 = XESWAP16((int16_t)cbs->read(cbs->context, address));
return IA_NEXT;
}
cbs = cbs->next;
}
return IA_NEXT;
}
uint32_t IntCode_LOAD_REGISTER_I32_DYNAMIC(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = ics.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
ics.rf[i->dest_reg].i32 = XESWAP32((int32_t)cbs->read(cbs->context, address));
return IA_NEXT;
}
cbs = cbs->next;
}
return IA_NEXT;
}
uint32_t IntCode_LOAD_REGISTER_I64_DYNAMIC(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = ics.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
ics.rf[i->dest_reg].i64 = XESWAP64((int64_t)cbs->read(cbs->context, address));
return IA_NEXT;
}
cbs = cbs->next;
}
return IA_NEXT;
}
uint32_t IntCode_STORE_REGISTER_I8(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
(i->src3_reg | ((uint64_t)i->dest_reg << 32));
cbs->write(cbs->context, address, ics.rf[i->src2_reg].i8);
return IA_NEXT;
}
uint32_t IntCode_STORE_REGISTER_I16(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
(i->src3_reg | ((uint64_t)i->dest_reg << 32));
cbs->write(cbs->context, address, XESWAP16(ics.rf[i->src2_reg].i16));
return IA_NEXT;
}
uint32_t IntCode_STORE_REGISTER_I32(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
(i->src3_reg | ((uint64_t)i->dest_reg << 32));
cbs->write(cbs->context, address, XESWAP32(ics.rf[i->src2_reg].i32));
return IA_NEXT;
}
uint32_t IntCode_STORE_REGISTER_I64(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
(i->src3_reg | ((uint64_t)i->dest_reg << 32));
cbs->write(cbs->context, address, XESWAP64(ics.rf[i->src2_reg].i64));
return IA_NEXT;
}
int DispatchRegisterWrite(
TranslationContext& ctx, Instr* i, RegisterAccessCallbacks* cbs) {
static IntCodeFn fns[] = {
IntCode_STORE_REGISTER_I8,
IntCode_STORE_REGISTER_I16,
IntCode_STORE_REGISTER_I32,
IntCode_STORE_REGISTER_I64,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
IntCodeFn fn = fns[i->src2.value->type];
XEASSERT(fn != IntCode_INVALID_TYPE);
uint32_t src1_reg = AllocOpRegister(ctx, OPCODE_SIG_TYPE_V, &i->src1);
uint32_t src2_reg = AllocOpRegister(ctx, OPCODE_SIG_TYPE_V, &i->src2);
ctx.intcode_count++;
IntCode* ic = ctx.intcode_arena->Alloc<IntCode>();
ic->intcode_fn = fn;
ic->flags = i->flags;
ic->debug_flags = 0;
ic->dest_reg = (uint32_t)(((uint64_t)cbs) >> 32);
ic->src1_reg = src1_reg;
ic->src2_reg = src2_reg;
ic->src3_reg = (uint32_t)((uint64_t)cbs);
return 0;
}
uint32_t IntCode_STORE_REGISTER_I8_DYNAMIC(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = ics.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
cbs->write(cbs->context, address, ics.rf[i->src2_reg].i8);
return IA_NEXT;
}
cbs = cbs->next;
}
return IA_NEXT;
}
uint32_t IntCode_STORE_REGISTER_I16_DYNAMIC(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = ics.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
cbs->write(cbs->context, address, XESWAP16(ics.rf[i->src2_reg].i16));
return IA_NEXT;
}
cbs = cbs->next;
}
return IA_NEXT;
}
uint32_t IntCode_STORE_REGISTER_I32_DYNAMIC(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = ics.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
cbs->write(cbs->context, address, XESWAP32(ics.rf[i->src2_reg].i32));
return IA_NEXT;
}
cbs = cbs->next;
}
return IA_NEXT;
}
uint32_t IntCode_STORE_REGISTER_I64_DYNAMIC(IntCodeState& ics, const IntCode* i) {
uint64_t address = ics.rf[i->src1_reg].u32;
RegisterAccessCallbacks* cbs = ics.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
cbs->write(cbs->context, address, XESWAP64(ics.rf[i->src2_reg].i64));
return IA_NEXT;
}
cbs = cbs->next;
}
return IA_NEXT;
}
uint32_t IntCode_INVALID(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_INVALID(IntCodeState& ics, const IntCode* i) {
XEASSERTALWAYS(); XEASSERTALWAYS();
return IA_NEXT; return IA_NEXT;
@ -417,7 +210,7 @@ int TranslateInvalid(TranslationContext& ctx, Instr* i) {
uint32_t IntCode_COMMENT(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_COMMENT(IntCodeState& ics, const IntCode* i) {
char* value = (char*)(i->src1_reg | ((uint64_t)i->src2_reg << 32)); char* value = (char*)(i->src1_reg | ((uint64_t)i->src2_reg << 32));
IPRINT("XE[t] :%d: %s\n", ics.thread_state->GetThreadID(), value); IPRINT("XE[t] :%d: %s\n", ics.thread_state->thread_id(), value);
IFLUSH(); IFLUSH();
return IA_NEXT; return IA_NEXT;
} }
@ -576,11 +369,15 @@ int Translate_TRAP_TRUE(TranslationContext& ctx, Instr* i) {
uint32_t IntCode_CALL_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) { uint32_t IntCode_CALL_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) {
FunctionInfo* symbol_info = (FunctionInfo*)ics.rf[reg].u64; FunctionInfo* symbol_info = (FunctionInfo*)ics.rf[reg].u64;
Function* fn = NULL; Function* fn = symbol_info->function();
if (!fn) {
ics.thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn); ics.thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn);
}
XEASSERTNOTNULL(fn); XEASSERTNOTNULL(fn);
// TODO(benvanik): proper tail call support, somehow. // TODO(benvanik): proper tail call support, somehow.
fn->Call(ics.thread_state); uint64_t return_address =
(i->flags & CALL_TAIL) ? ics.return_address : ics.call_return_address;
fn->Call(ics.thread_state, return_address);
if (i->flags & CALL_TAIL) { if (i->flags & CALL_TAIL) {
return IA_RETURN; return IA_RETURN;
} }
@ -645,12 +442,21 @@ int Translate_CALL_TRUE(TranslationContext& ctx, Instr* i) {
uint32_t IntCode_CALL_INDIRECT_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) { uint32_t IntCode_CALL_INDIRECT_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) {
uint64_t target = ics.rf[reg].u32; uint64_t target = ics.rf[reg].u32;
// Check if return address - if so, return.
if (i->flags & CALL_POSSIBLE_RETURN) {
if (target == ics.return_address) {
return IA_RETURN;
}
}
// Real call. // Real call.
Function* fn = NULL; Function* fn = NULL;
ics.thread_state->runtime()->ResolveFunction(target, &fn); ics.thread_state->runtime()->ResolveFunction(target, &fn);
XEASSERTNOTNULL(fn); XEASSERTNOTNULL(fn);
// TODO(benvanik): proper tail call support, somehow. // TODO(benvanik): proper tail call support, somehow.
fn->Call(ics.thread_state); uint64_t return_address =
(i->flags & CALL_TAIL) ? ics.return_address : ics.call_return_address;
fn->Call(ics.thread_state, return_address);
if (i->flags & CALL_TAIL) { if (i->flags & CALL_TAIL) {
return IA_RETURN; return IA_RETURN;
} }
@ -712,6 +518,13 @@ int Translate_CALL_INDIRECT_TRUE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->src1.value->type]); return DispatchToC(ctx, i, fns[i->src1.value->type]);
} }
uint32_t IntCode_CALL_EXTERN(IntCodeState& ics, const IntCode* i) {
return IntCode_CALL_XX(ics, i, i->src1_reg);
}
int Translate_CALL_EXTERN(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_CALL_EXTERN);
}
uint32_t IntCode_RETURN(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_RETURN(IntCodeState& ics, const IntCode* i) {
return IA_RETURN; return IA_RETURN;
} }
@ -768,6 +581,14 @@ int Translate_RETURN_TRUE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->src1.value->type]); return DispatchToC(ctx, i, fns[i->src1.value->type]);
} }
uint32_t IntCode_SET_RETURN_ADDRESS(IntCodeState& ics, const IntCode* i) {
ics.call_return_address = ics.rf[i->src1_reg].u32;
return IA_NEXT;
}
int Translate_SET_RETURN_ADDRESS(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_SET_RETURN_ADDRESS);
}
uint32_t IntCode_BRANCH_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) { uint32_t IntCode_BRANCH_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) {
return ics.rf[reg].u32; return ics.rf[reg].u32;
} }
@ -1335,34 +1156,116 @@ int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_LOAD_CLOCK); return DispatchToC(ctx, i, IntCode_LOAD_CLOCK);
} }
uint32_t IntCode_LOAD_LOCAL_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT;
}
uint32_t IntCode_LOAD_LOCAL_I16(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT;
}
uint32_t IntCode_LOAD_LOCAL_I32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT;
}
uint32_t IntCode_LOAD_LOCAL_I64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT;
}
uint32_t IntCode_LOAD_LOCAL_F32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f32 = *((float*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT;
}
uint32_t IntCode_LOAD_LOCAL_F64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f64 = *((double*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT;
}
uint32_t IntCode_LOAD_LOCAL_V128(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32));
return IA_NEXT;
}
int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_LOAD_LOCAL_I8,
IntCode_LOAD_LOCAL_I16,
IntCode_LOAD_LOCAL_I32,
IntCode_LOAD_LOCAL_I64,
IntCode_LOAD_LOCAL_F32,
IntCode_LOAD_LOCAL_F64,
IntCode_LOAD_LOCAL_V128,
};
return DispatchToC(ctx, i, fns[i->dest->type]);
}
uint32_t IntCode_STORE_LOCAL_I8(IntCodeState& ics, const IntCode* i) {
*((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i8;
return IA_NEXT;
}
uint32_t IntCode_STORE_LOCAL_I16(IntCodeState& ics, const IntCode* i) {
*((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i16;
return IA_NEXT;
}
uint32_t IntCode_STORE_LOCAL_I32(IntCodeState& ics, const IntCode* i) {
*((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i32;
return IA_NEXT;
}
uint32_t IntCode_STORE_LOCAL_I64(IntCodeState& ics, const IntCode* i) {
*((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i64;
return IA_NEXT;
}
uint32_t IntCode_STORE_LOCAL_F32(IntCodeState& ics, const IntCode* i) {
*((float*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f32;
return IA_NEXT;
}
uint32_t IntCode_STORE_LOCAL_F64(IntCodeState& ics, const IntCode* i) {
*((double*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f64;
return IA_NEXT;
}
uint32_t IntCode_STORE_LOCAL_V128(IntCodeState& ics, const IntCode* i) {
*((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].v128;
return IA_NEXT;
}
int Translate_STORE_LOCAL(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_STORE_LOCAL_I8,
IntCode_STORE_LOCAL_I16,
IntCode_STORE_LOCAL_I32,
IntCode_STORE_LOCAL_I64,
IntCode_STORE_LOCAL_F32,
IntCode_STORE_LOCAL_F64,
IntCode_STORE_LOCAL_V128,
};
return DispatchToC(ctx, i, fns[i->src2.value->type]);
}
uint32_t IntCode_LOAD_CONTEXT_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_CONTEXT_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64));
DPRINT("%d (%.X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64); DPRINT("%d (%X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_CONTEXT_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_CONTEXT_I16(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.context + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.context + ics.rf[i->src1_reg].u64));
DPRINT("%d (%.X) = ctx i16 +%d\n", ics.rf[i->dest_reg].i16, ics.rf[i->dest_reg].u16, ics.rf[i->src1_reg].u64); DPRINT("%d (%X) = ctx i16 +%d\n", ics.rf[i->dest_reg].i16, ics.rf[i->dest_reg].u16, ics.rf[i->src1_reg].u64);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_CONTEXT_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_CONTEXT_I32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.context + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.context + ics.rf[i->src1_reg].u64));
DPRINT("%d (%.X) = ctx i32 +%d\n", ics.rf[i->dest_reg].i32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64); DPRINT("%d (%X) = ctx i32 +%d\n", ics.rf[i->dest_reg].i32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_CONTEXT_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_CONTEXT_I64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.context + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.context + ics.rf[i->src1_reg].u64));
DPRINT("%lld (%.llX) = ctx i64 +%d\n", ics.rf[i->dest_reg].i64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64); DPRINT("%lld (%llX) = ctx i64 +%d\n", ics.rf[i->dest_reg].i64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_CONTEXT_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_CONTEXT_F32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f32 = *((float*)(ics.context + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].f32 = *((float*)(ics.context + ics.rf[i->src1_reg].u64));
DPRINT("%e (%.X) = ctx f32 +%d\n", ics.rf[i->dest_reg].f32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64); DPRINT("%e (%X) = ctx f32 +%d\n", ics.rf[i->dest_reg].f32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_CONTEXT_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_CONTEXT_F64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f64 = *((double*)(ics.context + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].f64 = *((double*)(ics.context + ics.rf[i->src1_reg].u64));
DPRINT("%lle (%.llX) = ctx f64 +%d\n", ics.rf[i->dest_reg].f64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64); DPRINT("%lle (%llX) = ctx f64 +%d\n", ics.rf[i->dest_reg].f64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_LOAD_CONTEXT_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_CONTEXT_V128(IntCodeState& ics, const IntCode* i) {
@ -1388,39 +1291,39 @@ int Translate_LOAD_CONTEXT(TranslationContext& ctx, Instr* i) {
uint32_t IntCode_STORE_CONTEXT_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_CONTEXT_I8(IntCodeState& ics, const IntCode* i) {
*((int8_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i8; *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i8;
DPRINT("ctx i8 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8); DPRINT("ctx i8 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_CONTEXT_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_CONTEXT_I16(IntCodeState& ics, const IntCode* i) {
*((int16_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i16; *((int16_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i16;
DPRINT("ctx i16 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16); DPRINT("ctx i16 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_CONTEXT_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_CONTEXT_I32(IntCodeState& ics, const IntCode* i) {
*((int32_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i32; *((int32_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i32;
DPRINT("ctx i32 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32); DPRINT("ctx i32 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_CONTEXT_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_CONTEXT_I64(IntCodeState& ics, const IntCode* i) {
*((int64_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i64; *((int64_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i64;
DPRINT("ctx i64 +%d = %lld (%.llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64); DPRINT("ctx i64 +%d = %lld (%llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_CONTEXT_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_CONTEXT_F32(IntCodeState& ics, const IntCode* i) {
*((float*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f32; *((float*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f32;
DPRINT("ctx f32 +%d = %e (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32); DPRINT("ctx f32 +%d = %e (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_CONTEXT_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_CONTEXT_F64(IntCodeState& ics, const IntCode* i) {
*((double*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f64; *((double*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f64;
DPRINT("ctx f64 +%d = %lle (%.llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64); DPRINT("ctx f64 +%d = %lle (%llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_CONTEXT_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_CONTEXT_V128(IntCodeState& ics, const IntCode* i) {
*((vec128_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].v128; *((vec128_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].v128;
DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", ics.rf[i->src1_reg].u64, DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", ics.rf[i->src1_reg].u64,
VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3), VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3),
VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3)); VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECI4(ics.rf[i->src2_reg].v128,3));
return IA_NEXT; return IA_NEXT;
} }
int Translate_STORE_CONTEXT(TranslationContext& ctx, Instr* i) { int Translate_STORE_CONTEXT(TranslationContext& ctx, Instr* i) {
@ -1439,7 +1342,8 @@ int Translate_STORE_CONTEXT(TranslationContext& ctx, Instr* i) {
uint32_t IntCode_LOAD_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
return IntCode_LOAD_REGISTER_I8_DYNAMIC(ics, i); ics.rf[i->dest_reg].i8 = ics.thread_state->memory()->LoadI8(address);
return IA_NEXT;
} }
DPRINT("%d (%X) = load.i8 %.8X\n", DPRINT("%d (%X) = load.i8 %.8X\n",
*((int8_t*)(ics.membase + address)), *((int8_t*)(ics.membase + address)),
@ -1452,7 +1356,9 @@ uint32_t IntCode_LOAD_I8(IntCodeState& ics, const IntCode* i) {
uint32_t IntCode_LOAD_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_I16(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
return IntCode_LOAD_REGISTER_I16_DYNAMIC(ics, i); ics.rf[i->dest_reg].i16 =
XESWAP16(ics.thread_state->memory()->LoadI16(address));
return IA_NEXT;
} }
DPRINT("%d (%X) = load.i16 %.8X\n", DPRINT("%d (%X) = load.i16 %.8X\n",
*((int16_t*)(ics.membase + address)), *((int16_t*)(ics.membase + address)),
@ -1465,7 +1371,9 @@ uint32_t IntCode_LOAD_I16(IntCodeState& ics, const IntCode* i) {
uint32_t IntCode_LOAD_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_I32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
return IntCode_LOAD_REGISTER_I32_DYNAMIC(ics, i); ics.rf[i->dest_reg].i32 =
XESWAP32(ics.thread_state->memory()->LoadI32(address));
return IA_NEXT;
} }
DFLUSH(); DFLUSH();
DPRINT("%d (%X) = load.i32 %.8X\n", DPRINT("%d (%X) = load.i32 %.8X\n",
@ -1479,7 +1387,9 @@ uint32_t IntCode_LOAD_I32(IntCodeState& ics, const IntCode* i) {
uint32_t IntCode_LOAD_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_I64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
return IntCode_LOAD_REGISTER_I64(ics, i); ics.rf[i->dest_reg].i64 =
XESWAP64(ics.thread_state->memory()->LoadI64(address));
return IA_NEXT;
} }
DPRINT("%lld (%llX) = load.i64 %.8X\n", DPRINT("%lld (%llX) = load.i64 %.8X\n",
*((int64_t*)(ics.membase + address)), *((int64_t*)(ics.membase + address)),
@ -1515,7 +1425,7 @@ uint32_t IntCode_LOAD_V128(IntCodeState& ics, const IntCode* i) {
for (int n = 0; n < 4; n++) { for (int n = 0; n < 4; n++) {
VECI4(dest,n) = *((uint32_t*)(ics.membase + address + n * 4)); VECI4(dest,n) = *((uint32_t*)(ics.membase + address + n * 4));
} }
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load v128 %.8X\n", DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load.v128 %.8X\n",
VECF4(dest,0), VECF4(dest,1), VECF4(dest,2), VECF4(dest,3), VECF4(dest,0), VECF4(dest,1), VECF4(dest,2), VECF4(dest,3),
VECI4(dest,0), VECI4(dest,1), VECI4(dest,2), VECI4(dest,3), VECI4(dest,0), VECI4(dest,1), VECI4(dest,2), VECI4(dest,3),
address); address);
@ -1532,90 +1442,95 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) {
IntCode_LOAD_F64, IntCode_LOAD_F64,
IntCode_LOAD_V128, IntCode_LOAD_V128,
}; };
if (i->src1.value->IsConstant()) {
// Constant address - check register access callbacks.
// NOTE: we still will likely want to check on access in debug mode, as
// constant propagation may not have happened.
uint64_t address = i->src1.value->AsUint64();
RegisterAccessCallbacks* cbs = ctx.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
return DispatchRegisterRead(ctx, i, cbs);
}
cbs = cbs->next;
}
}
return DispatchToC(ctx, i, fns[i->dest->type]); return DispatchToC(ctx, i, fns[i->dest->type]);
} }
void MarkPageDirty(IntCodeState& ics, uint32_t address) {
// 16KB pages.
ics.page_table[(address >> 14) & 0x7FFF] = 1;
}
uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
return IntCode_STORE_REGISTER_I8_DYNAMIC(ics, i); ics.thread_state->memory()->StoreI8(address, ics.rf[i->src2_reg].i8);
return IA_NEXT;
} }
DPRINT("store.i8 %.8X = %d (%X)\n", DPRINT("store.i8 %.8X = %d (%X)\n",
address, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].i8); address, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8);
DFLUSH(); DFLUSH();
*((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8; *((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
return IntCode_STORE_REGISTER_I16_DYNAMIC(ics, i); ics.thread_state->memory()->StoreI16(address,
XESWAP16(ics.rf[i->src2_reg].i16));
return IA_NEXT;
} }
DPRINT("store.i16 %.8X = %d (%X)\n", DPRINT("store.i16 %.8X = %d (%X)\n",
address, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].i16); address, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16);
DFLUSH(); DFLUSH();
*((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16; *((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
return IntCode_STORE_REGISTER_I32_DYNAMIC(ics, i); ics.thread_state->memory()->StoreI32(address,
XESWAP32(ics.rf[i->src2_reg].i32));
return IA_NEXT;
} }
DPRINT("store.i32 %.8X = %d (%X)\n", DPRINT("store.i32 %.8X = %d (%X)\n",
address, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].i32); address, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32);
DFLUSH(); DFLUSH();
*((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32; *((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
return IntCode_STORE_REGISTER_I64_DYNAMIC(ics, i); ics.thread_state->memory()->StoreI64(address,
XESWAP64(ics.rf[i->src2_reg].i64));
return IA_NEXT;
} }
DPRINT("store.i64 %.8X = %lld (%llX)\n", DPRINT("store.i64 %.8X = %lld (%llX)\n",
address, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].i64); address, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64);
DFLUSH(); DFLUSH();
*((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64; *((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
DPRINT("store.f32 %.8X = %e (%X)\n", DPRINT("store.f32 %.8X = %e (%X)\n",
address, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].i32); address, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32);
DFLUSH(); DFLUSH();
*((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32; *((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
DPRINT("store.f64 %.8X = %lle (%llX)\n", DPRINT("store.f64 %.8X = %lle (%llX)\n",
address, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].i64); address, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64);
DFLUSH(); DFLUSH();
*((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64; *((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
DPRINT("store v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", DPRINT("store.v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n",
address, address,
VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3), VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3),
VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECI4(ics.rf[i->src2_reg].v128,3)); VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECI4(ics.rf[i->src2_reg].v128,3));
DFLUSH(); DFLUSH();
*((vec128_t*)(ics.membase + address)) = ics.rf[i->src2_reg].v128; *((vec128_t*)(ics.membase + address)) = ics.rf[i->src2_reg].v128;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
int Translate_STORE(TranslationContext& ctx, Instr* i) { int Translate_STORE(TranslationContext& ctx, Instr* i) {
@ -1628,19 +1543,6 @@ int Translate_STORE(TranslationContext& ctx, Instr* i) {
IntCode_STORE_F64, IntCode_STORE_F64,
IntCode_STORE_V128, IntCode_STORE_V128,
}; };
if (i->src1.value->IsConstant()) {
// Constant address - check register access callbacks.
// NOTE: we still will likely want to check on access in debug mode, as
// constant propagation may not have happened.
uint64_t address = i->src1.value->AsUint64();
RegisterAccessCallbacks* cbs = ctx.access_callbacks;
while (cbs) {
if (cbs->handles(cbs->context, address)) {
return DispatchRegisterWrite(ctx, i, cbs);
}
cbs = cbs->next;
}
}
return DispatchToC(ctx, i, fns[i->src2.value->type]); return DispatchToC(ctx, i, fns[i->src2.value->type]);
} }
@ -2093,19 +1995,19 @@ int Translate_DID_SATURATE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_DID_SATURATE); return DispatchToC(ctx, i, IntCode_DID_SATURATE);
} }
#define VECTOR_COMPARER(type, value, count, op) \ #define VECTOR_COMPARER(type, value, dest_value, count, op) \
const vec128_t& src1 = ics.rf[i->src1_reg].v128; \ const vec128_t& src1 = ics.rf[i->src1_reg].v128; \
const vec128_t& src2 = ics.rf[i->src2_reg].v128; \ const vec128_t& src2 = ics.rf[i->src2_reg].v128; \
vec128_t& dest = ics.rf[i->dest_reg].v128; \ vec128_t& dest = ics.rf[i->dest_reg].v128; \
for (int n = 0; n < count; n++) { \ for (int n = 0; n < count; n++) { \
dest.value[n] = (type)src1.value[n] op (type)src2.value[n]; \ dest.dest_value[n] = ((type)src1.value[n] op (type)src2.value[n]) ? 0xFFFFFFFF : 0; \
} \ } \
return IA_NEXT; return IA_NEXT;
uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, ==) }; uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, ==) };
uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, ==) }; uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, ==) };
uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, ==) }; uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, ==) };
uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, ==) }; uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, ==) };
int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_EQ_I8, IntCode_VECTOR_COMPARE_EQ_I8,
@ -2119,10 +2021,10 @@ int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->flags]); return DispatchToC(ctx, i, fns[i->flags]);
} }
uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, 16, >) }; uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, b16, 16, >) };
uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, 8, >) }; uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, s8, 8, >) };
uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, 4, >) }; uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, i4, 4, >) };
uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >) }; uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >) };
int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_SGT_I8, IntCode_VECTOR_COMPARE_SGT_I8,
@ -2136,10 +2038,10 @@ int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->flags]); return DispatchToC(ctx, i, fns[i->flags]);
} }
uint32_t IntCode_VECTOR_COMPARE_SGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, 16, >=) }; uint32_t IntCode_VECTOR_COMPARE_SGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, b16, 16, >=) };
uint32_t IntCode_VECTOR_COMPARE_SGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, 8, >=) }; uint32_t IntCode_VECTOR_COMPARE_SGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, s8, 8, >=) };
uint32_t IntCode_VECTOR_COMPARE_SGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, 4, >=) }; uint32_t IntCode_VECTOR_COMPARE_SGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, i4, 4, >=) };
uint32_t IntCode_VECTOR_COMPARE_SGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >=) }; uint32_t IntCode_VECTOR_COMPARE_SGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >=) };
int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_SGE_I8, IntCode_VECTOR_COMPARE_SGE_I8,
@ -2153,10 +2055,10 @@ int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->flags]); return DispatchToC(ctx, i, fns[i->flags]);
} }
uint32_t IntCode_VECTOR_COMPARE_UGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, >) }; uint32_t IntCode_VECTOR_COMPARE_UGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, >) };
uint32_t IntCode_VECTOR_COMPARE_UGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, >) }; uint32_t IntCode_VECTOR_COMPARE_UGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, >) };
uint32_t IntCode_VECTOR_COMPARE_UGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, >) }; uint32_t IntCode_VECTOR_COMPARE_UGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, >) };
uint32_t IntCode_VECTOR_COMPARE_UGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >) }; uint32_t IntCode_VECTOR_COMPARE_UGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >) };
int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_UGT_I8, IntCode_VECTOR_COMPARE_UGT_I8,
@ -2170,10 +2072,10 @@ int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->flags]); return DispatchToC(ctx, i, fns[i->flags]);
} }
uint32_t IntCode_VECTOR_COMPARE_UGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, >=) }; uint32_t IntCode_VECTOR_COMPARE_UGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, >=) };
uint32_t IntCode_VECTOR_COMPARE_UGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, >=) }; uint32_t IntCode_VECTOR_COMPARE_UGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, >=) };
uint32_t IntCode_VECTOR_COMPARE_UGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, >=) }; uint32_t IntCode_VECTOR_COMPARE_UGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, >=) };
uint32_t IntCode_VECTOR_COMPARE_UGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >=) }; uint32_t IntCode_VECTOR_COMPARE_UGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >=) };
int Translate_VECTOR_COMPARE_UGE(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_UGE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_UGE_I8, IntCode_VECTOR_COMPARE_UGE_I8,
@ -2466,9 +2368,9 @@ uint32_t IntCode_SUB_I16_I16(IntCodeState& ics, const IntCode* i) {
uint32_t IntCode_SUB_I32_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_SUB_I32_I32(IntCodeState& ics, const IntCode* i) {
int32_t a = ics.rf[i->src1_reg].i32; int32_t b = ics.rf[i->src2_reg].i32; int32_t a = ics.rf[i->src1_reg].i32; int32_t b = ics.rf[i->src2_reg].i32;
if (i->flags == ARITHMETIC_SET_CARRY) { if (i->flags == ARITHMETIC_SET_CARRY) {
ics.did_carry = a < ~b;
}
ics.did_carry = SUB_DID_CARRY(a, b); ics.did_carry = SUB_DID_CARRY(a, b);
}
ics.rf[i->dest_reg].i32 = a - b;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_SUB_I64_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_SUB_I64_I64(IntCodeState& ics, const IntCode* i) {
@ -3605,17 +3507,17 @@ int Translate_CNTLZ(TranslationContext& ctx, Instr* i) {
uint32_t IntCode_EXTRACT_INT8_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_EXTRACT_INT8_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i64); ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i8);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_EXTRACT_INT16_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_EXTRACT_INT16_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i64); ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i8);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_EXTRACT_INT32_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_EXTRACT_INT32_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i64); ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i8);
return IA_NEXT; return IA_NEXT;
} }
int Translate_EXTRACT(TranslationContext& ctx, Instr* i) { int Translate_EXTRACT(TranslationContext& ctx, Instr* i) {
@ -3817,6 +3719,7 @@ uint32_t IntCode_PACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) {
uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128;
dest.ix = dest.iy = 0;
dest.iz = dest.iz =
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) | ((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) |
DirectX::PackedVector::XMConvertFloatToHalf(src1.y); DirectX::PackedVector::XMConvertFloatToHalf(src1.y);
@ -4009,8 +3912,10 @@ static const TranslateFn dispatch_table[] = {
Translate_CALL_TRUE, Translate_CALL_TRUE,
Translate_CALL_INDIRECT, Translate_CALL_INDIRECT,
Translate_CALL_INDIRECT_TRUE, Translate_CALL_INDIRECT_TRUE,
Translate_CALL_EXTERN,
Translate_RETURN, Translate_RETURN,
Translate_RETURN_TRUE, Translate_RETURN_TRUE,
Translate_SET_RETURN_ADDRESS,
Translate_BRANCH, Translate_BRANCH,
Translate_BRANCH_TRUE, Translate_BRANCH_TRUE,
@ -4031,6 +3936,9 @@ static const TranslateFn dispatch_table[] = {
Translate_LOAD_CLOCK, Translate_LOAD_CLOCK,
Translate_LOAD_LOCAL,
Translate_STORE_LOCAL,
Translate_LOAD_CONTEXT, Translate_LOAD_CONTEXT,
Translate_STORE_CONTEXT, Translate_STORE_CONTEXT,

View File

@ -14,7 +14,6 @@
#include <alloy/hir/instr.h> #include <alloy/hir/instr.h>
#include <alloy/hir/opcodes.h> #include <alloy/hir/opcodes.h>
#include <alloy/runtime/register_access.h>
namespace alloy { namespace runtime { class ThreadState; } } namespace alloy { namespace runtime { class ThreadState; } }
@ -41,13 +40,15 @@ typedef union {
typedef struct { typedef struct {
Register* rf; Register* rf;
uint8_t* locals;
uint8_t* context; uint8_t* context;
uint8_t* membase; uint8_t* membase;
uint32_t* reserve_address; uint8_t* page_table;
int8_t did_carry; int8_t did_carry;
int8_t did_saturate; int8_t did_saturate;
runtime::RegisterAccessCallbacks* access_callbacks;
runtime::ThreadState* thread_state; runtime::ThreadState* thread_state;
uint64_t return_address;
uint64_t call_return_address;
} IntCodeState; } IntCodeState;
@ -95,8 +96,6 @@ typedef struct SourceMapEntry_s {
typedef struct { typedef struct {
runtime::RegisterAccessCallbacks* access_callbacks;
uint32_t register_count; uint32_t register_count;
size_t intcode_count; size_t intcode_count;
Arena* intcode_arena; Arena* intcode_arena;
@ -104,6 +103,7 @@ typedef struct {
Arena* source_map_arena; Arena* source_map_arena;
Arena* scratch_arena; Arena* scratch_arena;
LabelRef* label_ref_head; LabelRef* label_ref_head;
size_t stack_size;
} TranslationContext; } TranslationContext;

View File

@ -32,17 +32,17 @@ public:
ALLOY_BACKEND_IVM_ASSEMBLER_DEINIT = ALLOY_BACKEND_IVM_ASSEMBLER | (2), ALLOY_BACKEND_IVM_ASSEMBLER_DEINIT = ALLOY_BACKEND_IVM_ASSEMBLER | (2),
}; };
typedef struct { typedef struct Init_s {
static const uint32_t event_type = ALLOY_BACKEND_IVM_INIT; static const uint32_t event_type = ALLOY_BACKEND_IVM_INIT;
} Init; } Init;
typedef struct { typedef struct Deinit_s {
static const uint32_t event_type = ALLOY_BACKEND_IVM_DEINIT; static const uint32_t event_type = ALLOY_BACKEND_IVM_DEINIT;
} Deinit; } Deinit;
typedef struct { typedef struct AssemblerInit_s {
static const uint32_t event_type = ALLOY_BACKEND_IVM_ASSEMBLER_INIT; static const uint32_t event_type = ALLOY_BACKEND_IVM_ASSEMBLER_INIT;
} AssemblerInit; } AssemblerInit;
typedef struct { typedef struct AssemblerDeinit_s {
static const uint32_t event_type = ALLOY_BACKEND_IVM_ASSEMBLER_DEINIT; static const uint32_t event_type = ALLOY_BACKEND_IVM_ASSEMBLER_DEINIT;
} AssemblerDeinit; } AssemblerDeinit;
}; };

View File

@ -0,0 +1,39 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_BACKEND_MACHINE_INFO_H_
#define ALLOY_BACKEND_MACHINE_INFO_H_
#include <alloy/core.h>
namespace alloy {
namespace backend {
struct MachineInfo {
struct RegisterSet {
enum Types {
INT_TYPES = (1 << 1),
FLOAT_TYPES = (1 << 2),
VEC_TYPES = (1 << 3),
};
uint8_t id;
char name[4];
uint32_t types;
uint32_t count;
} register_sets[8];
};
} // namespace backend
} // namespace alloy
#endif // ALLOY_BACKEND_MACHINE_INFO_H_

View File

@ -5,6 +5,7 @@
'assembler.h', 'assembler.h',
'backend.cc', 'backend.cc',
'backend.h', 'backend.h',
'machine_info.h',
'tracing.h', 'tracing.h',
], ],

File diff suppressed because it is too large Load Diff

View File

@ -1,71 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/backend/x64/lowering/lowering_table.h>
#include <alloy/backend/x64/x64_emitter.h>
#include <alloy/backend/x64/lowering/lowering_sequences.h>
using namespace alloy;
using namespace alloy::backend::x64;
using namespace alloy::backend::x64::lowering;
LoweringTable::LoweringTable(X64Backend* backend) :
backend_(backend) {
xe_zero_struct(lookup_, sizeof(lookup_));
}
LoweringTable::~LoweringTable() {
for (size_t n = 0; n < XECOUNT(lookup_); n++) {
auto entry = lookup_[n];
while (entry) {
auto next = entry->next;
delete entry;
entry = next;
}
}
}
int LoweringTable::Initialize() {
RegisterSequences(this);
return 0;
}
void LoweringTable::AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn) {
auto existing_entry = lookup_[starting_opcode];
auto new_entry = new sequence_fn_entry_t();
new_entry->fn = fn;
new_entry->next = existing_entry;
lookup_[starting_opcode] = new_entry;
}
int LoweringTable::ProcessBlock(X64Emitter& e, hir::Block* block) {
// Process instructions.
auto instr = block->instr_head;
while (instr) {
bool processed = false;
auto entry = lookup_[instr->opcode->num];
while (entry) {
if ((*entry->fn)(e, instr)) {
processed = true;
break;
}
entry = entry->next;
}
if (!processed) {
// No sequence found!
XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
return 1;
instr = e.Advance(instr);
}
}
return 0;
}

View File

@ -1,58 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
#include <alloy/core.h>
#include <alloy/hir/hir_builder.h>
namespace alloy {
namespace backend {
namespace x64 {
class X64Backend;
class X64Emitter;
namespace lowering {
class LoweringTable {
public:
LoweringTable(X64Backend* backend);
~LoweringTable();
int Initialize();
int ProcessBlock(X64Emitter& e, hir::Block* block);
public:
typedef bool(*sequence_fn_t)(X64Emitter& e, hir::Instr*& instr);
void AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn);
private:
class sequence_fn_entry_t {
public:
sequence_fn_t fn;
sequence_fn_entry_t* next;
};
// NOTE: this class is shared by multiple threads and is not thread safe.
// Do not modify anything after init.
X64Backend* backend_;
sequence_fn_entry_t* lookup_[hir::__OPCODE_MAX_VALUE];
};
} // namespace lowering
} // namespace x64
} // namespace backend
} // namespace alloy
#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_

View File

@ -1,9 +0,0 @@
# Copyright 2013 Ben Vanik. All Rights Reserved.
{
'sources': [
'lowering_sequences.cc',
'lowering_sequences.h',
'lowering_table.cc',
'lowering_table.h',
],
}

View File

@ -12,9 +12,12 @@
'x64_emitter.h', 'x64_emitter.h',
'x64_function.cc', 'x64_function.cc',
'x64_function.h', 'x64_function.h',
], 'x64_sequence.inl',
'x64_sequences.cc',
'includes': [ 'x64_sequences.h',
'lowering/sources.gypi', 'x64_thunk_emitter.cc',
'x64_thunk_emitter.h',
'x64_tracers.cc',
'x64_tracers.h',
], ],
} }

View File

@ -32,17 +32,17 @@ public:
ALLOY_BACKEND_X64_ASSEMBLER_DEINIT = ALLOY_BACKEND_X64_ASSEMBLER | (2), ALLOY_BACKEND_X64_ASSEMBLER_DEINIT = ALLOY_BACKEND_X64_ASSEMBLER | (2),
}; };
typedef struct { typedef struct Init_s {
static const uint32_t event_type = ALLOY_BACKEND_X64_INIT; static const uint32_t event_type = ALLOY_BACKEND_X64_INIT;
} Init; } Init;
typedef struct { typedef struct Deinit_s {
static const uint32_t event_type = ALLOY_BACKEND_X64_DEINIT; static const uint32_t event_type = ALLOY_BACKEND_X64_DEINIT;
} Deinit; } Deinit;
typedef struct { typedef struct AssemblerInit_s {
static const uint32_t event_type = ALLOY_BACKEND_X64_ASSEMBLER_INIT; static const uint32_t event_type = ALLOY_BACKEND_X64_ASSEMBLER_INIT;
} AssemblerInit; } AssemblerInit;
typedef struct { typedef struct AssemblerDeinit_s {
static const uint32_t event_type = ALLOY_BACKEND_X64_ASSEMBLER_DEINIT; static const uint32_t event_type = ALLOY_BACKEND_X64_ASSEMBLER_DEINIT;
} AssemblerDeinit; } AssemblerDeinit;
}; };

View File

@ -30,7 +30,7 @@ using namespace alloy::runtime;
X64Assembler::X64Assembler(X64Backend* backend) : X64Assembler::X64Assembler(X64Backend* backend) :
x64_backend_(backend), x64_backend_(backend),
emitter_(0), emitter_(0), allocator_(0),
Assembler(backend) { Assembler(backend) {
} }
@ -39,6 +39,7 @@ X64Assembler::~X64Assembler() {
})); }));
delete emitter_; delete emitter_;
delete allocator_;
} }
int X64Assembler::Initialize() { int X64Assembler::Initialize() {
@ -47,8 +48,8 @@ int X64Assembler::Initialize() {
return result; return result;
} }
emitter_ = new X64Emitter(x64_backend_, allocator_ = new XbyakAllocator();
new XbyakAllocator()); emitter_ = new X64Emitter(x64_backend_, allocator_);
alloy::tracing::WriteEvent(EventType::AssemblerInit({ alloy::tracing::WriteEvent(EventType::AssemblerInit({
})); }));
@ -65,6 +66,8 @@ int X64Assembler::Assemble(
FunctionInfo* symbol_info, HIRBuilder* builder, FunctionInfo* symbol_info, HIRBuilder* builder,
uint32_t debug_info_flags, DebugInfo* debug_info, uint32_t debug_info_flags, DebugInfo* debug_info,
Function** out_function) { Function** out_function) {
SCOPE_profile_cpu_f("alloy");
int result = 0; int result = 0;
// Lower HIR -> x64. // Lower HIR -> x64.
@ -82,6 +85,7 @@ int X64Assembler::Assemble(
string_buffer_.Reset(); string_buffer_.Reset();
} }
{
X64Function* fn = new X64Function(symbol_info); X64Function* fn = new X64Function(symbol_info);
fn->set_debug_info(debug_info); fn->set_debug_info(debug_info);
fn->Setup(machine_code, code_size); fn->Setup(machine_code, code_size);
@ -89,6 +93,7 @@ int X64Assembler::Assemble(
*out_function = fn; *out_function = fn;
result = 0; result = 0;
}
XECLEANUP: XECLEANUP:
Reset(); Reset();

View File

@ -21,6 +21,7 @@ namespace x64 {
class X64Backend; class X64Backend;
class X64Emitter; class X64Emitter;
class XbyakAllocator;
class X64Assembler : public Assembler { class X64Assembler : public Assembler {
@ -45,6 +46,7 @@ private:
private: private:
X64Backend* x64_backend_; X64Backend* x64_backend_;
X64Emitter* emitter_; X64Emitter* emitter_;
XbyakAllocator* allocator_;
StringBuffer string_buffer_; StringBuffer string_buffer_;
}; };

View File

@ -12,25 +12,23 @@
#include <alloy/backend/x64/tracing.h> #include <alloy/backend/x64/tracing.h>
#include <alloy/backend/x64/x64_assembler.h> #include <alloy/backend/x64/x64_assembler.h>
#include <alloy/backend/x64/x64_code_cache.h> #include <alloy/backend/x64/x64_code_cache.h>
#include <alloy/backend/x64/lowering/lowering_table.h> #include <alloy/backend/x64/x64_sequences.h>
#include <alloy/backend/x64/lowering/lowering_sequences.h> #include <alloy/backend/x64/x64_thunk_emitter.h>
using namespace alloy; using namespace alloy;
using namespace alloy::backend; using namespace alloy::backend;
using namespace alloy::backend::x64; using namespace alloy::backend::x64;
using namespace alloy::backend::x64::lowering;
using namespace alloy::runtime; using namespace alloy::runtime;
X64Backend::X64Backend(Runtime* runtime) : X64Backend::X64Backend(Runtime* runtime) :
code_cache_(0), lowering_table_(0), code_cache_(0),
Backend(runtime) { Backend(runtime) {
} }
X64Backend::~X64Backend() { X64Backend::~X64Backend() {
alloy::tracing::WriteEvent(EventType::Deinit({ alloy::tracing::WriteEvent(EventType::Deinit({
})); }));
delete lowering_table_;
delete code_cache_; delete code_cache_;
} }
@ -40,14 +38,34 @@ int X64Backend::Initialize() {
return result; return result;
} }
RegisterSequences();
machine_info_.register_sets[0] = {
0,
"gpr",
MachineInfo::RegisterSet::INT_TYPES,
X64Emitter::GPR_COUNT,
};
machine_info_.register_sets[1] = {
1,
"xmm",
MachineInfo::RegisterSet::FLOAT_TYPES |
MachineInfo::RegisterSet::VEC_TYPES,
X64Emitter::XMM_COUNT,
};
code_cache_ = new X64CodeCache(); code_cache_ = new X64CodeCache();
result = code_cache_->Initialize(); result = code_cache_->Initialize();
if (result) { if (result) {
return result; return result;
} }
lowering_table_ = new LoweringTable(this); auto allocator = new XbyakAllocator();
RegisterSequences(lowering_table_); auto thunk_emitter = new X64ThunkEmitter(this, allocator);
host_to_guest_thunk_ = thunk_emitter->EmitHostToGuestThunk();
guest_to_host_thunk_ = thunk_emitter->EmitGuestToHostThunk();
delete thunk_emitter;
delete allocator;
alloy::tracing::WriteEvent(EventType::Init({ alloy::tracing::WriteEvent(EventType::Init({
})); }));

View File

@ -20,19 +20,22 @@ namespace backend {
namespace x64 { namespace x64 {
class X64CodeCache; class X64CodeCache;
namespace lowering { class LoweringTable; }
#define ALLOY_HAS_X64_BACKEND 1 #define ALLOY_HAS_X64_BACKEND 1
typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1);
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
class X64Backend : public Backend { class X64Backend : public Backend {
public: public:
X64Backend(runtime::Runtime* runtime); X64Backend(runtime::Runtime* runtime);
virtual ~X64Backend(); virtual ~X64Backend();
X64CodeCache* code_cache() const { return code_cache_; } X64CodeCache* code_cache() const { return code_cache_; }
lowering::LoweringTable* lowering_table() const { return lowering_table_; } HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; }
GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; }
virtual int Initialize(); virtual int Initialize();
@ -40,7 +43,8 @@ public:
private: private:
X64CodeCache* code_cache_; X64CodeCache* code_cache_;
lowering::LoweringTable* lowering_table_; HostToGuestThunk host_to_guest_thunk_;
GuestToHostThunk guest_to_host_thunk_;
}; };

View File

@ -34,14 +34,14 @@ public:
const static uint32_t ESTIMATED_FN_SIZE = 512; const static uint32_t ESTIMATED_FN_SIZE = 512;
// Size of unwind info per function. // Size of unwind info per function.
// TODO(benvanik): move this to emitter. // TODO(benvanik): move this to emitter.
const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1); const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1 + 2 + 2);
void* fn_table_handle; void* fn_table_handle;
RUNTIME_FUNCTION* fn_table; RUNTIME_FUNCTION* fn_table;
uint32_t fn_table_count; uint32_t fn_table_count;
uint32_t fn_table_capacity; uint32_t fn_table_capacity;
void AddTableEntry(uint8_t* code, size_t code_size); void AddTableEntry(uint8_t* code, size_t code_size, size_t stack_size);
}; };
@ -73,7 +73,10 @@ int X64CodeCache::Initialize() {
return 0; return 0;
} }
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) { void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
size_t stack_size) {
SCOPE_profile_cpu_f("alloy");
// Add unwind info into the allocation size. Keep things 16b aligned. // Add unwind info into the allocation size. Keep things 16b aligned.
code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16); code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16);
@ -101,7 +104,7 @@ void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) {
active_chunk_->offset += code_size; active_chunk_->offset += code_size;
// Add entry to fn table. // Add entry to fn table.
active_chunk_->AddTableEntry(final_address, code_size); active_chunk_->AddTableEntry(final_address, code_size, stack_size);
UnlockMutex(lock_); UnlockMutex(lock_);
@ -156,6 +159,27 @@ typedef enum _UNWIND_OP_CODES {
UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */ UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */ UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
} UNWIND_CODE_OPS; } UNWIND_CODE_OPS;
class UNWIND_REGISTER {
public:
enum _ {
RAX = 0,
RCX = 1,
RDX = 2,
RBX = 3,
RSP = 4,
RBP = 5,
RSI = 6,
RDI = 7,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
};
};
typedef union _UNWIND_CODE { typedef union _UNWIND_CODE {
struct { struct {
@ -183,7 +207,8 @@ typedef struct _UNWIND_INFO {
} UNWIND_INFO, *PUNWIND_INFO; } UNWIND_INFO, *PUNWIND_INFO;
} // namespace } // namespace
void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) { void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size,
size_t stack_size) {
// NOTE: we assume a chunk lock. // NOTE: we assume a chunk lock.
if (fn_table_count + 1 > fn_table_capacity) { if (fn_table_count + 1 > fn_table_capacity) {
@ -213,26 +238,57 @@ void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) {
size_t unwind_info_offset = offset; size_t unwind_info_offset = offset;
offset += UNWIND_INFO_SIZE; offset += UNWIND_INFO_SIZE;
// TODO(benvanik): take as parameters? if (!stack_size) {
bool has_prolog = true; uint8_t prolog_size = 0;
uint8_t prolog_size = 4;
uint8_t stack_bytes = 64;
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
unwind_info->Version = 1; unwind_info->Version = 1;
unwind_info->Flags = 0; unwind_info->Flags = 0;
unwind_info->SizeOfProlog = has_prolog ? prolog_size : 0; unwind_info->SizeOfProlog = 0;
unwind_info->CountOfCodes = has_prolog ? 1 : 0; unwind_info->CountOfCodes = 0;
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
} else if (stack_size <= 128) {
uint8_t prolog_size = 4;
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = prolog_size;
unwind_info->CountOfCodes = 1;
unwind_info->FrameRegister = 0; unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0; unwind_info->FrameOffset = 0;
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
auto& code_0 = unwind_info->UnwindCode[0]; size_t co = 0;
code_0.CodeOffset = 4; // end of instruction + 1 == offset of next instruction auto& unwind_code = unwind_info->UnwindCode[co++];
code_0.UnwindOp = UWOP_ALLOC_SMALL; unwind_code.CodeOffset = 14; // end of instruction + 1 == offset of next instruction
code_0.OpInfo = stack_bytes / 8 - 1; unwind_code.UnwindOp = UWOP_ALLOC_SMALL;
XEASSERT(stack_bytes < 128); unwind_code.OpInfo = stack_size / 8 - 1;
} else {
// TODO(benvanik): take as parameters?
uint8_t prolog_size = 7;
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = prolog_size;
unwind_info->CountOfCodes = 3;
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
size_t co = 0;
auto& unwind_code = unwind_info->UnwindCode[co++];
unwind_code.CodeOffset = 7; // end of instruction + 1 == offset of next instruction
unwind_code.UnwindOp = UWOP_ALLOC_LARGE;
unwind_code.OpInfo = 0;
unwind_code = unwind_info->UnwindCode[co++];
unwind_code.FrameOffset = (USHORT)(stack_size) / 8;
}
// Add entry. // Add entry.
auto& fn_entry = fn_table[fn_table_count++]; auto& fn_entry = fn_table[fn_table_count++];

View File

@ -30,7 +30,7 @@ public:
// TODO(benvanik): keep track of code blocks // TODO(benvanik): keep track of code blocks
// TODO(benvanik): padding/guards/etc // TODO(benvanik): padding/guards/etc
void* PlaceCode(void* machine_code, size_t code_size); void* PlaceCode(void* machine_code, size_t code_size, size_t stack_size);
private: private:
const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024;

View File

@ -11,9 +11,14 @@
#include <alloy/backend/x64/x64_backend.h> #include <alloy/backend/x64/x64_backend.h>
#include <alloy/backend/x64/x64_code_cache.h> #include <alloy/backend/x64/x64_code_cache.h>
#include <alloy/backend/x64/lowering/lowering_table.h> #include <alloy/backend/x64/x64_function.h>
#include <alloy/backend/x64/x64_sequences.h>
#include <alloy/backend/x64/x64_thunk_emitter.h>
#include <alloy/hir/hir_builder.h> #include <alloy/hir/hir_builder.h>
#include <alloy/runtime/debug_info.h> #include <alloy/runtime/debug_info.h>
#include <alloy/runtime/runtime.h>
#include <alloy/runtime/symbol_info.h>
#include <alloy/runtime/thread_state.h>
using namespace alloy; using namespace alloy;
using namespace alloy::backend; using namespace alloy::backend;
@ -30,22 +35,38 @@ namespace x64 {
static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024; static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024;
static const size_t STASH_OFFSET = 32;
// If we are running with tracing on we have to store the EFLAGS in the stack,
// otherwise our calls out to C to print will clear it before DID_CARRY/etc
// can get the value.
#define STORE_EFLAGS 1
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace alloy } // namespace alloy
const uint32_t X64Emitter::gpr_reg_map_[X64Emitter::GPR_COUNT] = {
Operand::RBX,
Operand::R12, Operand::R13, Operand::R14, Operand::R15,
};
const uint32_t X64Emitter::xmm_reg_map_[X64Emitter::XMM_COUNT] = {
6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
};
X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) : X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) :
runtime_(backend->runtime()),
backend_(backend), backend_(backend),
code_cache_(backend->code_cache()), code_cache_(backend->code_cache()),
allocator_(allocator), allocator_(allocator),
current_instr_(0), current_instr_(0),
CodeGenerator(MAX_CODE_SIZE, AutoGrow, allocator) { CodeGenerator(MAX_CODE_SIZE, AutoGrow, allocator) {
xe_zero_struct(&reg_state_, sizeof(reg_state_));
} }
X64Emitter::~X64Emitter() { X64Emitter::~X64Emitter() {
delete allocator_;
} }
int X64Emitter::Initialize() { int X64Emitter::Initialize() {
@ -56,6 +77,8 @@ int X64Emitter::Emit(
HIRBuilder* builder, HIRBuilder* builder,
uint32_t debug_info_flags, runtime::DebugInfo* debug_info, uint32_t debug_info_flags, runtime::DebugInfo* debug_info,
void*& out_code_address, size_t& out_code_size) { void*& out_code_address, size_t& out_code_size) {
SCOPE_profile_cpu_f("alloy");
// Reset. // Reset.
if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) { if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) {
source_map_count_ = 0; source_map_count_ = 0;
@ -63,14 +86,15 @@ int X64Emitter::Emit(
} }
// Fill the generator with code. // Fill the generator with code.
int result = Emit(builder); size_t stack_size = 0;
int result = Emit(builder, stack_size);
if (result) { if (result) {
return result; return result;
} }
// Copy the final code to the cache and relocate it. // Copy the final code to the cache and relocate it.
out_code_size = getSize(); out_code_size = getSize();
out_code_address = Emplace(code_cache_); out_code_address = Emplace(stack_size);
// Stash source map. // Stash source map.
if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) { if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) {
@ -82,13 +106,13 @@ int X64Emitter::Emit(
return 0; return 0;
} }
void* X64Emitter::Emplace(X64CodeCache* code_cache) { void* X64Emitter::Emplace(size_t stack_size) {
// To avoid changing xbyak, we do a switcharoo here. // To avoid changing xbyak, we do a switcharoo here.
// top_ points to the Xbyak buffer, and since we are in AutoGrow mode // top_ points to the Xbyak buffer, and since we are in AutoGrow mode
// it has pending relocations. We copy the top_ to our buffer, swap the // it has pending relocations. We copy the top_ to our buffer, swap the
// pointer, relocate, then return the original scratch pointer for use. // pointer, relocate, then return the original scratch pointer for use.
uint8_t* old_address = top_; uint8_t* old_address = top_;
void* new_address = code_cache->PlaceCode(top_, size_); void* new_address = code_cache_->PlaceCode(top_, size_, stack_size);
top_ = (uint8_t*)new_address; top_ = (uint8_t*)new_address;
ready(); ready();
top_ = old_address; top_ = old_address;
@ -96,17 +120,22 @@ void* X64Emitter::Emplace(X64CodeCache* code_cache) {
return new_address; return new_address;
} }
int X64Emitter::Emit(HIRBuilder* builder) { int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
// These are the registers we will not be using. All others are fare game. // Calculate stack size. We need to align things to their natural sizes.
const uint32_t reserved_regs = // This could be much better (sort by type/etc).
GetRegBit(rax) | auto locals = builder->locals();
GetRegBit(rcx) | size_t stack_offset = StackLayout::GUEST_STACK_SIZE;
GetRegBit(rdx) | for (auto it = locals.begin(); it != locals.end(); ++it) {
GetRegBit(rsp) | auto slot = *it;
GetRegBit(rbp) | size_t type_size = GetTypeSize(slot->type);
GetRegBit(rsi) | // Align to natural size.
GetRegBit(rdi) | stack_offset = XEALIGN(stack_offset, type_size);
GetRegBit(xmm0); slot->set_constant((uint32_t)stack_offset);
stack_offset += type_size;
}
// Ensure 16b alignment.
stack_offset -= StackLayout::GUEST_STACK_SIZE;
stack_offset = XEALIGN(stack_offset, 16);
// Function prolog. // Function prolog.
// Must be 16b aligned. // Must be 16b aligned.
@ -120,20 +149,18 @@ int X64Emitter::Emit(HIRBuilder* builder) {
// X64CodeCache, which dynamically generates exception information. // X64CodeCache, which dynamically generates exception information.
// Adding or changing anything here must be matched! // Adding or changing anything here must be matched!
const bool emit_prolog = true; const bool emit_prolog = true;
const size_t stack_size = 64; const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset;
XEASSERT((stack_size + 8) % 16 == 0);
out_stack_size = stack_size;
stack_size_ = stack_size;
if (emit_prolog) { if (emit_prolog) {
mov(qword[rsp + 16], rdx); sub(rsp, (uint32_t)stack_size);
mov(qword[rsp + 8], rcx); mov(qword[rsp + StackLayout::GUEST_RCX_HOME], rcx);
sub(rsp, stack_size); mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx);
mov(qword[rsp + 8 * 0], rbx); mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0);
mov(qword[rsp + 8 * 1], r12); mov(rdx, qword[rcx + 8]); // membase
mov(qword[rsp + 8 * 2], r13);
mov(qword[rsp + 8 * 3], r14);
mov(qword[rsp + 8 * 4], r15);
} }
auto lowering_table = backend_->lowering_table();
// Body. // Body.
auto block = builder->first_block(); auto block = builder->first_block();
while (block) { while (block) {
@ -144,17 +171,17 @@ int X64Emitter::Emit(HIRBuilder* builder) {
label = label->next; label = label->next;
} }
// Reset reg allocation state. // Process instructions.
// If we start keeping regs across blocks this needs to change. const Instr* instr = block->instr_head;
// We mark a few active so that the allocator doesn't use them. while (instr) {
reg_state_.active_regs = reg_state_.live_regs = reserved_regs; const Instr* new_tail = instr;
if (!SelectSequence(*this, instr, &new_tail)) {
// Add instructions. // No sequence found!
// The table will process sequences of instructions to (try to) XEASSERTALWAYS();
// generate optimal code. XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
current_instr_ = block->instr_head; break;
if (lowering_table->ProcessBlock(*this, block)) { }
return 1; instr = new_tail;
} }
block = block->next; block = block->next;
@ -163,12 +190,8 @@ int X64Emitter::Emit(HIRBuilder* builder) {
// Function epilog. // Function epilog.
L("epilog"); L("epilog");
if (emit_prolog) { if (emit_prolog) {
mov(rbx, qword[rsp + 8 * 0]); mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]);
mov(r12, qword[rsp + 8 * 1]); add(rsp, (uint32_t)stack_size);
mov(r13, qword[rsp + 8 * 2]);
mov(r14, qword[rsp + 8 * 3]);
mov(r15, qword[rsp + 8 * 4]);
add(rsp, stack_size);
} }
ret(); ret();
@ -183,181 +206,398 @@ int X64Emitter::Emit(HIRBuilder* builder) {
return 0; return 0;
} }
void X64Emitter::EvictStaleRegs() { void X64Emitter::MarkSourceOffset(const Instr* i) {
// NOTE: if we are getting called it's because we *need* a register.
// We must get rid of something.
uint32_t current_ordinal = current_instr_->ordinal;
// Remove any register with no more uses.
uint32_t new_live_regs = 0;
for (size_t n = 0; n < 32; n++) {
uint32_t bit = 1 << n;
if (bit & reg_state_.active_regs) {
// Register is active and cannot be freed.
new_live_regs |= bit;
continue;
}
if (!(bit & reg_state_.live_regs)) {
// Register is not alive - nothing to do.
continue;
}
// Register is live, not active. Check and see if we get rid of it.
auto v = reg_state_.reg_values[n];
if (v->last_use->ordinal < current_ordinal) {
reg_state_.reg_values[n] = NULL;
}
}
// Hrm. We have spilled.
if (reg_state_.live_regs == new_live_regs) {
XEASSERTALWAYS();
}
reg_state_.live_regs = new_live_regs;
}
void X64Emitter::FindFreeRegs(
Value* v0, uint32_t& v0_idx, uint32_t v0_flags) {
// If the value is already in a register, use it.
if (v0->reg != -1) {
// Already in a register. Mark active and return.
v0_idx = v0->reg;
reg_state_.active_regs |= 1 << v0_idx;
return;
}
uint32_t avail_regs = 0;
if (IsIntType(v0->type)) {
if (v0_flags & REG_ABCD) {
avail_regs = B00001111;
} else {
avail_regs = 0xFFFF;
}
} else {
avail_regs = 0xFFFF0000;
}
uint32_t free_regs = avail_regs & ~reg_state_.live_regs;
if (!free_regs) {
// Need to evict something.
EvictStaleRegs();
}
// Find the first available.
// We start from the MSB so that we get the non-rNx regs that are often
// in short supply.
_BitScanReverse((DWORD*)&v0_idx, free_regs);
reg_state_.active_regs |= 1 << v0_idx;
reg_state_.live_regs |= 1 << v0_idx;
v0->reg = v0_idx;
reg_state_.reg_values[v0_idx] = v0;
}
void X64Emitter::FindFreeRegs(
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
Value* v1, uint32_t& v1_idx, uint32_t v1_flags) {
// TODO(benvanik): support REG_DEST reuse/etc.
// Grab all already-present registers first.
// This way we won't spill them trying to get new registers.
bool need_v0 = v0->reg == -1;
bool need_v1 = v1->reg == -1;
if (!need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (!need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
// Grab any registers we still need. These calls may evict.
if (need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
}
void X64Emitter::FindFreeRegs(
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
Value* v2, uint32_t& v2_idx, uint32_t v2_flags) {
// TODO(benvanik): support REG_DEST reuse/etc.
// Grab all already-present registers first.
// This way we won't spill them trying to get new registers.
bool need_v0 = v0->reg == -1;
bool need_v1 = v1->reg == -1;
bool need_v2 = v2->reg == -1;
if (!need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (!need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
if (!need_v2) {
FindFreeRegs(v2, v2_idx, v2_flags);
}
// Grab any registers we still need. These calls may evict.
if (need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
if (need_v2) {
FindFreeRegs(v2, v2_idx, v2_flags);
}
}
void X64Emitter::FindFreeRegs(
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
Value* v2, uint32_t& v2_idx, uint32_t v2_flags,
Value* v3, uint32_t& v3_idx, uint32_t v3_flags) {
// TODO(benvanik): support REG_DEST reuse/etc.
// Grab all already-present registers first.
// This way we won't spill them trying to get new registers.
bool need_v0 = v0->reg == -1;
bool need_v1 = v1->reg == -1;
bool need_v2 = v2->reg == -1;
bool need_v3 = v3->reg == -1;
if (!need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (!need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
if (!need_v2) {
FindFreeRegs(v2, v2_idx, v2_flags);
}
if (!need_v3) {
FindFreeRegs(v3, v3_idx, v3_flags);
}
// Grab any registers we still need. These calls may evict.
if (need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
if (need_v2) {
FindFreeRegs(v2, v2_idx, v2_flags);
}
if (need_v3) {
FindFreeRegs(v3, v3_idx, v3_flags);
}
}
Instr* X64Emitter::Advance(Instr* i) {
auto next = i->next;
current_instr_ = next;
return next;
}
void X64Emitter::MarkSourceOffset(Instr* i) {
auto entry = source_map_arena_.Alloc<SourceMapEntry>(); auto entry = source_map_arena_.Alloc<SourceMapEntry>();
entry->source_offset = i->src1.offset; entry->source_offset = i->src1.offset;
entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal; entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal;
entry->code_offset = getSize(); entry->code_offset = getSize();
source_map_count_++; source_map_count_++;
} }
void X64Emitter::DebugBreak() {
// TODO(benvanik): notify debugger.
db(0xCC);
}
void X64Emitter::Trap() {
// 0x0FE00014 is a 'debug print' where r3 = buffer r4 = length
// TODO(benvanik): post software interrupt to debugger.
db(0xCC);
}
void X64Emitter::UnimplementedInstr(const hir::Instr* i) {
// TODO(benvanik): notify debugger.
db(0xCC);
XEASSERTALWAYS();
}
// Total size of ResolveFunctionSymbol call site in bytes.
// Used to overwrite it with nops as needed.
const size_t TOTAL_RESOLVE_SIZE = 27;
const size_t ASM_OFFSET = 2 + 2 + 8 + 2 + 8;
// Length Assembly Byte Sequence
// =================================================================================
// 2 bytes 66 NOP 66 90H
// 3 bytes NOP DWORD ptr [EAX] 0F 1F 00H
// 4 bytes NOP DWORD ptr [EAX + 00H] 0F 1F 40 00H
// 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H] 0F 1F 44 00 00H
// 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H] 66 0F 1F 44 00 00H
// 7 bytes NOP DWORD ptr [EAX + 00000000H] 0F 1F 80 00 00 00 00H
// 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H] 0F 1F 84 00 00 00 00 00H
// 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00H
uint64_t ResolveFunctionSymbol(void* raw_context, uint64_t symbol_info_ptr) {
// TODO(benvanik): generate this thunk at runtime? or a shim?
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
auto symbol_info = reinterpret_cast<FunctionInfo*>(symbol_info_ptr);
// Resolve function. This will demand compile as required.
Function* fn = NULL;
thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn);
XEASSERTNOTNULL(fn);
auto x64_fn = static_cast<X64Function*>(fn);
uint64_t addr = reinterpret_cast<uint64_t>(x64_fn->machine_code());
// Overwrite the call site.
// The return address points to ReloadRCX work after the call.
uint64_t return_address = reinterpret_cast<uint64_t>(_ReturnAddress());
#pragma pack(push, 1)
struct Asm {
uint16_t mov_rax;
uint64_t rax_constant;
uint16_t mov_rdx;
uint64_t rdx_constant;
uint16_t call_rax;
uint8_t mov_rcx[5];
};
#pragma pack(pop)
Asm* code = reinterpret_cast<Asm*>(return_address - ASM_OFFSET);
code->rax_constant = addr;
code->call_rax = 0x9066;
// We need to return the target in rax so that it gets called.
return addr;
}
void X64Emitter::Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info) {
auto fn = reinterpret_cast<X64Function*>(symbol_info->function());
// Resolve address to the function to call and store in rax.
// TODO(benvanik): caching/etc. For now this makes debugging easier.
if (fn) {
mov(rax, reinterpret_cast<uint64_t>(fn->machine_code()));
} else {
size_t start = getSize();
// 2b + 8b constant
mov(rax, reinterpret_cast<uint64_t>(ResolveFunctionSymbol));
// 2b + 8b constant
mov(rdx, reinterpret_cast<uint64_t>(symbol_info));
// 2b
call(rax);
// 5b
ReloadECX();
size_t total_size = getSize() - start;
XEASSERT(total_size == TOTAL_RESOLVE_SIZE);
// EDX overwritten, don't bother reloading.
}
// Actually jump/call to rax.
if (instr->flags & CALL_TAIL) {
// Pass the callers return address over.
mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
add(rsp, static_cast<uint32_t>(stack_size()));
jmp(rax);
} else {
// Return address is from the previous SET_RETURN_ADDRESS.
mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
call(rax);
}
}
uint64_t ResolveFunctionAddress(void* raw_context, uint64_t target_address) {
// TODO(benvanik): generate this thunk at runtime? or a shim?
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
// TODO(benvanik): required?
target_address &= 0xFFFFFFFF;
Function* fn = NULL;
thread_state->runtime()->ResolveFunction(target_address, &fn);
XEASSERTNOTNULL(fn);
auto x64_fn = static_cast<X64Function*>(fn);
return reinterpret_cast<uint64_t>(x64_fn->machine_code());
}
void X64Emitter::CallIndirect(const hir::Instr* instr, const Reg64& reg) {
// Check if return.
if (instr->flags & CALL_POSSIBLE_RETURN) {
cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]);
je("epilog", CodeGenerator::T_NEAR);
}
// Resolve address to the function to call and store in rax.
// TODO(benvanik): caching/etc. For now this makes debugging easier.
if (reg.getIdx() != rdx.getIdx()) {
mov(rdx, reg);
}
CallNative(ResolveFunctionAddress);
// Actually jump/call to rax.
if (instr->flags & CALL_TAIL) {
// Pass the callers return address over.
mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
add(rsp, static_cast<uint32_t>(stack_size()));
jmp(rax);
} else {
// Return address is from the previous SET_RETURN_ADDRESS.
mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
call(rax);
}
}
uint64_t UndefinedCallExtern(void* raw_context, uint64_t symbol_info_ptr) {
auto symbol_info = reinterpret_cast<FunctionInfo*>(symbol_info_ptr);
XELOGW("undefined extern call to %.8X %s",
symbol_info->address(),
symbol_info->name());
return 0;
}
void X64Emitter::CallExtern(const hir::Instr* instr, const FunctionInfo* symbol_info) {
XEASSERT(symbol_info->behavior() == FunctionInfo::BEHAVIOR_EXTERN);
if (!symbol_info->extern_handler()) {
CallNative(UndefinedCallExtern, reinterpret_cast<uint64_t>(symbol_info));
} else {
// rcx = context
// rdx = target host function
// r8 = arg0
// r9 = arg1
mov(rdx, reinterpret_cast<uint64_t>(symbol_info->extern_handler()));
mov(r8, reinterpret_cast<uint64_t>(symbol_info->extern_arg0()));
mov(r9, reinterpret_cast<uint64_t>(symbol_info->extern_arg1()));
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
call(rax);
ReloadECX();
ReloadEDX();
// rax = host return
}
}
void X64Emitter::CallNative(void* fn) {
mov(rax, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
}
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context)) {
mov(rax, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
}
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0)) {
mov(rax, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
}
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0) {
mov(rdx, arg0);
mov(rax, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
}
void X64Emitter::CallNativeSafe(void* fn) {
// rcx = context
// rdx = target host function
// r8 = arg0
// r9 = arg1
mov(rdx, reinterpret_cast<uint64_t>(fn));
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
call(rax);
ReloadECX();
ReloadEDX();
// rax = host return
}
void X64Emitter::SetReturnAddress(uint64_t value) {
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], value);
}
void X64Emitter::ReloadECX() {
mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]);
}
void X64Emitter::ReloadEDX() {
mov(rdx, qword[rcx + 8]); // membase
}
void X64Emitter::LoadEflags() {
#if STORE_EFLAGS
mov(eax, dword[rsp + STASH_OFFSET]);
push(rax);
popf();
#else
// EFLAGS already present.
#endif // STORE_EFLAGS
}
void X64Emitter::StoreEflags() {
#if STORE_EFLAGS
pushf();
pop(qword[rsp + STASH_OFFSET]);
#else
// EFLAGS should have CA set?
// (so long as we don't fuck with it)
#endif // STORE_EFLAGS
}
uint32_t X64Emitter::page_table_address() const {
uint64_t addr = runtime_->memory()->page_table();
return static_cast<uint32_t>(addr);
}
bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) {
if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov.
return true;
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
// Negative number that fits in 32bits.
return true;
}
return false;
}
void X64Emitter::MovMem64(const RegExp& addr, uint64_t v) {
if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov.
mov(qword[addr], v);
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
// Negative number that fits in 32bits.
mov(qword[addr], v);
} else if (!(v >> 32)) {
// All high bits are zero. It'd be nice if we had a way to load a 32bit
// immediate without sign extending!
// TODO(benvanik): this is super common, find a better way.
mov(dword[addr], static_cast<uint32_t>(v));
mov(dword[addr + 4], 0);
} else {
// 64bit number that needs double movs.
mov(dword[addr], static_cast<uint32_t>(v));
mov(dword[addr + 4], static_cast<uint32_t>(v >> 32));
}
}
Address X64Emitter::GetXmmConstPtr(XmmConst id) {
static const vec128_t xmm_consts[] = {
/* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f),
/* XMMOne */ vec128f(1.0f, 1.0f, 1.0f, 1.0f),
/* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f),
/* XMMMaskX16Y16 */ vec128i(0x0000FFFFu, 0xFFFF0000u, 0x00000000u, 0x00000000u),
/* XMMFlipX16Y16 */ vec128i(0x00008000u, 0x00000000u, 0x00000000u, 0x00000000u),
/* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f),
/* XMMNormalizeX16Y16 */ vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
/* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
/* XMMAbsMaskPS */ vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu),
/* XMMAbsMaskPD */ vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, 0x7FFFFFFFu),
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du, 0xFFFFFF0Cu, 0xFFFFFF0Fu),
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
/* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0x00000000u),
/* XMM255 */ vec128f(255.0f, 255.0f, 255.0f, 255.0f),
/* XMMSignMaskI8 */ vec128i(0x80808080u, 0x80808080u, 0x80808080u, 0x80808080u),
/* XMMSignMaskI16 */ vec128i(0x80008000u, 0x80008000u, 0x80008000u, 0x80008000u),
/* XMMSignMaskI32 */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* XMMSignMaskF32 */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
};
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
// prevent this move.
// TODO(benvanik): move to predictable location in PPCContext? could then
// just do rcx relative addression with no rax overwriting.
mov(rax, (uint64_t)&xmm_consts[id]);
return ptr[rax];
}
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
// http://www.agner.org/optimize/optimizing_assembly.pdf
// 13.4 Generating constants
if (!v.low && !v.high) {
// 0000...
vpxor(dest, dest);
} else if (v.low == ~0ull && v.high == ~0ull) {
// 1111...
vpcmpeqb(dest, dest);
} else {
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
MovMem64(rsp + STASH_OFFSET, v.low);
MovMem64(rsp + STASH_OFFSET + 8, v.high);
vmovdqa(dest, ptr[rsp + STASH_OFFSET]);
}
}
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, float v) {
union {
float f;
uint32_t i;
} x = { v };
if (!v) {
// 0
vpxor(dest, dest);
} else if (x.i == ~0UL) {
// 1111...
vpcmpeqb(dest, dest);
} else {
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
mov(eax, x.i);
vmovd(dest, eax);
}
}
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, double v) {
union {
double d;
uint64_t i;
} x = { v };
if (!v) {
// 0
vpxor(dest, dest);
} else if (x.i == ~0ULL) {
// 1111...
vpcmpeqb(dest, dest);
} else {
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
mov(rax, x.i);
vmovq(dest, rax);
}
}
Address X64Emitter::StashXmm(const Xmm& r) {
auto addr = ptr[rsp + STASH_OFFSET];
vmovups(addr, r);
return addr;
}
Address X64Emitter::StashXmm(const vec128_t& v) {
auto addr = ptr[rsp + STASH_OFFSET];
LoadConstantXmm(xmm0, v);
vmovups(addr, xmm0);
return addr;
}

View File

@ -19,6 +19,9 @@
XEDECLARECLASS2(alloy, hir, HIRBuilder); XEDECLARECLASS2(alloy, hir, HIRBuilder);
XEDECLARECLASS2(alloy, hir, Instr); XEDECLARECLASS2(alloy, hir, Instr);
XEDECLARECLASS2(alloy, runtime, DebugInfo); XEDECLARECLASS2(alloy, runtime, DebugInfo);
XEDECLARECLASS2(alloy, runtime, FunctionInfo);
XEDECLARECLASS2(alloy, runtime, Runtime);
XEDECLARECLASS2(alloy, runtime, SymbolInfo);
namespace alloy { namespace alloy {
namespace backend { namespace backend {
@ -32,6 +35,35 @@ enum RegisterFlags {
REG_ABCD = (1 << 1), REG_ABCD = (1 << 1),
}; };
enum XmmConst {
XMMZero = 0,
XMMOne,
XMMNegativeOne,
XMMMaskX16Y16,
XMMFlipX16Y16,
XMMFixX16Y16,
XMMNormalizeX16Y16,
XMM0001,
XMM3301,
XMMSignMaskPS,
XMMSignMaskPD,
XMMAbsMaskPS,
XMMAbsMaskPD,
XMMByteSwapMask,
XMMPermuteControl15,
XMMPackD3DCOLOR,
XMMUnpackD3DCOLOR,
XMMOneOver255,
XMMShiftMaskPS,
XMMShiftByteMask,
XMMUnsignedDwordMax,
XMM255,
XMMSignMaskI8,
XMMSignMaskI16,
XMMSignMaskI32,
XMMSignMaskF32,
};
// Unfortunately due to the design of xbyak we have to pass this to the ctor. // Unfortunately due to the design of xbyak we have to pass this to the ctor.
class XbyakAllocator : public Xbyak::Allocator { class XbyakAllocator : public Xbyak::Allocator {
public: public:
@ -43,6 +75,9 @@ public:
X64Emitter(X64Backend* backend, XbyakAllocator* allocator); X64Emitter(X64Backend* backend, XbyakAllocator* allocator);
virtual ~X64Emitter(); virtual ~X64Emitter();
runtime::Runtime* runtime() const { return runtime_; }
X64Backend* backend() const { return backend_; }
int Initialize(); int Initialize();
int Emit(hir::HIRBuilder* builder, int Emit(hir::HIRBuilder* builder,
@ -50,118 +85,93 @@ public:
void*& out_code_address, size_t& out_code_size); void*& out_code_address, size_t& out_code_size);
public: public:
template<typename V0> // Reserved: rsp
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) { // Scratch: rax/rcx/rdx
uint32_t v0_idx; // xmm0-2 (could be only xmm0 with some trickery)
FindFreeRegs(v0, v0_idx, r0_flags); // Available: rbx, r12-r15 (save to get r8-r11, rbp, rsi, rdi?)
SetupReg(v0_idx, r0); // xmm6-xmm15 (save to get xmm3-xmm5)
static const int GPR_COUNT = 5;
static const int XMM_COUNT = 10;
static void SetupReg(const hir::Value* v, Xbyak::Reg8& r) {
auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg8(idx);
} }
template<typename V0, typename V1> static void SetupReg(const hir::Value* v, Xbyak::Reg16& r) {
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, auto idx = gpr_reg_map_[v->reg.index];
hir::Value* v1, V1& r1, uint32_t r1_flags) { r = Xbyak::Reg16(idx);
uint32_t v0_idx, v1_idx;
FindFreeRegs(v0, v0_idx, r0_flags,
v1, v1_idx, r1_flags);
SetupReg(v0_idx, r0);
SetupReg(v1_idx, r1);
} }
template<typename V0, typename V1, typename V2> static void SetupReg(const hir::Value* v, Xbyak::Reg32& r) {
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, auto idx = gpr_reg_map_[v->reg.index];
hir::Value* v1, V1& r1, uint32_t r1_flags, r = Xbyak::Reg32(idx);
hir::Value* v2, V2& r2, uint32_t r2_flags) {
uint32_t v0_idx, v1_idx, v2_idx;
FindFreeRegs(v0, v0_idx, r0_flags,
v1, v1_idx, r1_flags,
v2, v2_idx, r2_flags);
SetupReg(v0_idx, r0);
SetupReg(v1_idx, r1);
SetupReg(v2_idx, r2);
} }
template<typename V0, typename V1, typename V2, typename V3> static void SetupReg(const hir::Value* v, Xbyak::Reg64& r) {
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, auto idx = gpr_reg_map_[v->reg.index];
hir::Value* v1, V1& r1, uint32_t r1_flags, r = Xbyak::Reg64(idx);
hir::Value* v2, V2& r2, uint32_t r2_flags,
hir::Value* v3, V3& r3, uint32_t r3_flags) {
uint32_t v0_idx, v1_idx, v2_idx, v3_idx;
FindFreeRegs(v0, v0_idx, r0_flags,
v1, v1_idx, r1_flags,
v2, v2_idx, r2_flags,
v3, v3_idx, r3_flags);
SetupReg(v0_idx, r0);
SetupReg(v1_idx, r1);
SetupReg(v2_idx, r2);
SetupReg(v3_idx, r3);
} }
template<typename V0> static void SetupReg(const hir::Value* v, Xbyak::Xmm& r) {
void EndOp(V0& r0) { auto idx = xmm_reg_map_[v->reg.index];
reg_state_.active_regs = reg_state_.active_regs ^ GetRegBit(r0); r = Xbyak::Xmm(idx);
}
template<typename V0, typename V1>
void EndOp(V0& r0, V1& r1) {
reg_state_.active_regs = reg_state_.active_regs ^ (
GetRegBit(r0) | GetRegBit(r1));
}
template<typename V0, typename V1, typename V2>
void EndOp(V0& r0, V1& r1, V2& r2) {
reg_state_.active_regs = reg_state_.active_regs ^ (
GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2));
}
template<typename V0, typename V1, typename V2, typename V3>
void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) {
reg_state_.active_regs = reg_state_.active_regs ^ (
GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2) | GetRegBit(r3));
} }
void EvictStaleRegs(); void MarkSourceOffset(const hir::Instr* i);
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags); void DebugBreak();
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags, void Trap();
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags); void UnimplementedInstr(const hir::Instr* i);
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags, void UnimplementedExtern(const hir::Instr* i);
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags);
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags,
hir::Value* v3, uint32_t& v3_idx, uint32_t v3_flags);
static void SetupReg(uint32_t idx, Xbyak::Reg8& r) { r = Xbyak::Reg8(idx); } void Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info);
static void SetupReg(uint32_t idx, Xbyak::Reg16& r) { r = Xbyak::Reg16(idx); } void CallIndirect(const hir::Instr* instr, const Xbyak::Reg64& reg);
static void SetupReg(uint32_t idx, Xbyak::Reg32& r) { r = Xbyak::Reg32(idx); } void CallExtern(const hir::Instr* instr, const runtime::FunctionInfo* symbol_info);
static void SetupReg(uint32_t idx, Xbyak::Reg64& r) { r = Xbyak::Reg64(idx); } void CallNative(void* fn);
static void SetupReg(uint32_t idx, Xbyak::Xmm& r) { r = Xbyak::Xmm(idx - 16); } void CallNative(uint64_t(*fn)(void* raw_context));
static uint32_t GetRegBit(const Xbyak::Reg8& r) { return 1 << r.getIdx(); } void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0));
static uint32_t GetRegBit(const Xbyak::Reg16& r) { return 1 << r.getIdx(); } void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0);
static uint32_t GetRegBit(const Xbyak::Reg32& r) { return 1 << r.getIdx(); } void CallNativeSafe(void* fn);
static uint32_t GetRegBit(const Xbyak::Reg64& r) { return 1 << r.getIdx(); } void SetReturnAddress(uint64_t value);
static uint32_t GetRegBit(const Xbyak::Xmm& r) { return 1 << (16 + r.getIdx()); } void ReloadECX();
void ReloadEDX();
hir::Instr* Advance(hir::Instr* i); // TODO(benvanik): Label for epilog (don't use strings).
void MarkSourceOffset(hir::Instr* i); void LoadEflags();
void StoreEflags();
private: uint32_t page_table_address() const;
void* Emplace(X64CodeCache* code_cache);
int Emit(hir::HIRBuilder* builder);
private: // Moves a 64bit immediate into memory.
bool ConstantFitsIn32Reg(uint64_t v);
void MovMem64(const Xbyak::RegExp& addr, uint64_t v);
Xbyak::Address GetXmmConstPtr(XmmConst id);
void LoadConstantXmm(Xbyak::Xmm dest, float v);
void LoadConstantXmm(Xbyak::Xmm dest, double v);
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
Xbyak::Address StashXmm(const Xbyak::Xmm& r);
Xbyak::Address StashXmm(const vec128_t& v);
size_t stack_size() const { return stack_size_; }
protected:
void* Emplace(size_t stack_size);
int Emit(hir::HIRBuilder* builder, size_t& out_stack_size);
protected:
runtime::Runtime* runtime_;
X64Backend* backend_; X64Backend* backend_;
X64CodeCache* code_cache_; X64CodeCache* code_cache_;
XbyakAllocator* allocator_; XbyakAllocator* allocator_;
struct {
// Registers currently active within a begin/end op block. These
// cannot be reused.
uint32_t active_regs;
// Registers with values in them.
uint32_t live_regs;
// Current register values.
hir::Value* reg_values[32];
} reg_state_;
hir::Instr* current_instr_; hir::Instr* current_instr_;
size_t source_map_count_; size_t source_map_count_;
Arena source_map_arena_; Arena source_map_arena_;
size_t stack_size_;
static const uint32_t gpr_reg_map_[GPR_COUNT];
static const uint32_t xmm_reg_map_[XMM_COUNT];
}; };

View File

@ -10,6 +10,7 @@
#include <alloy/backend/x64/x64_function.h> #include <alloy/backend/x64/x64_function.h>
#include <alloy/backend/x64/tracing.h> #include <alloy/backend/x64/tracing.h>
#include <alloy/backend/x64/x64_backend.h>
#include <alloy/runtime/runtime.h> #include <alloy/runtime/runtime.h>
#include <alloy/runtime/thread_state.h> #include <alloy/runtime/thread_state.h>
@ -21,7 +22,7 @@ using namespace alloy::runtime;
X64Function::X64Function(FunctionInfo* symbol_info) : X64Function::X64Function(FunctionInfo* symbol_info) :
machine_code_(NULL), code_size_(0), machine_code_(NULL), code_size_(0),
GuestFunction(symbol_info) { Function(symbol_info) {
} }
X64Function::~X64Function() { X64Function::~X64Function() {
@ -41,8 +42,12 @@ int X64Function::RemoveBreakpointImpl(Breakpoint* breakpoint) {
return 0; return 0;
} }
int X64Function::CallImpl(ThreadState* thread_state) { int X64Function::CallImpl(ThreadState* thread_state, uint64_t return_address) {
typedef void(*call_t)(void* raw_context, uint8_t* membase); auto backend = (X64Backend*)thread_state->runtime()->backend();
((call_t)machine_code_)(thread_state->raw_context(), thread_state->memory()->membase()); auto thunk = backend->host_to_guest_thunk();
thunk(
machine_code_,
thread_state->raw_context(),
(void*)return_address);
return 0; return 0;
} }

View File

@ -20,17 +20,21 @@ namespace backend {
namespace x64 { namespace x64 {
class X64Function : public runtime::GuestFunction { class X64Function : public runtime::Function {
public: public:
X64Function(runtime::FunctionInfo* symbol_info); X64Function(runtime::FunctionInfo* symbol_info);
virtual ~X64Function(); virtual ~X64Function();
void* machine_code() const { return machine_code_; }
size_t code_size() const { return code_size_; }
void Setup(void* machine_code, size_t code_size); void Setup(void* machine_code, size_t code_size);
protected: protected:
virtual int AddBreakpointImpl(runtime::Breakpoint* breakpoint); virtual int AddBreakpointImpl(runtime::Breakpoint* breakpoint);
virtual int RemoveBreakpointImpl(runtime::Breakpoint* breakpoint); virtual int RemoveBreakpointImpl(runtime::Breakpoint* breakpoint);
virtual int CallImpl(runtime::ThreadState* thread_state); virtual int CallImpl(runtime::ThreadState* thread_state,
uint64_t return_address);
private: private:
void* machine_code_; void* machine_code_;

View File

@ -0,0 +1,744 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
namespace {
enum KeyType {
KEY_TYPE_X = OPCODE_SIG_TYPE_X,
KEY_TYPE_L = OPCODE_SIG_TYPE_L,
KEY_TYPE_O = OPCODE_SIG_TYPE_O,
KEY_TYPE_S = OPCODE_SIG_TYPE_S,
KEY_TYPE_V_I8 = OPCODE_SIG_TYPE_V + INT8_TYPE,
KEY_TYPE_V_I16 = OPCODE_SIG_TYPE_V + INT16_TYPE,
KEY_TYPE_V_I32 = OPCODE_SIG_TYPE_V + INT32_TYPE,
KEY_TYPE_V_I64 = OPCODE_SIG_TYPE_V + INT64_TYPE,
KEY_TYPE_V_F32 = OPCODE_SIG_TYPE_V + FLOAT32_TYPE,
KEY_TYPE_V_F64 = OPCODE_SIG_TYPE_V + FLOAT64_TYPE,
KEY_TYPE_V_V128 = OPCODE_SIG_TYPE_V + VEC128_TYPE,
};
#pragma pack(push, 1)
union InstrKey {
struct {
uint32_t opcode : 8;
uint32_t dest : 5;
uint32_t src1 : 5;
uint32_t src2 : 5;
uint32_t src3 : 5;
uint32_t reserved : 4;
};
uint32_t value;
operator uint32_t() const {
return value;
}
InstrKey() : value(0) {}
InstrKey(uint32_t v) : value(v) {}
InstrKey(const Instr* i) : value(0) {
opcode = i->opcode->num;
uint32_t sig = i->opcode->signature;
dest = GET_OPCODE_SIG_TYPE_DEST(sig) ? OPCODE_SIG_TYPE_V + i->dest->type : 0;
src1 = GET_OPCODE_SIG_TYPE_SRC1(sig);
if (src1 == OPCODE_SIG_TYPE_V) {
src1 += i->src1.value->type;
}
src2 = GET_OPCODE_SIG_TYPE_SRC2(sig);
if (src2 == OPCODE_SIG_TYPE_V) {
src2 += i->src2.value->type;
}
src3 = GET_OPCODE_SIG_TYPE_SRC3(sig);
if (src3 == OPCODE_SIG_TYPE_V) {
src3 += i->src3.value->type;
}
}
template <Opcode OPCODE,
KeyType DEST = KEY_TYPE_X,
KeyType SRC1 = KEY_TYPE_X,
KeyType SRC2 = KEY_TYPE_X,
KeyType SRC3 = KEY_TYPE_X>
struct Construct {
static const uint32_t value =
(OPCODE) | (DEST << 8) | (SRC1 << 13) | (SRC2 << 18) | (SRC3 << 23);
};
};
#pragma pack(pop)
static_assert(sizeof(InstrKey) <= 4, "Key must be 4 bytes");
template <typename... Ts>
struct CombinedStruct;
template <>
struct CombinedStruct<> {};
template <typename T, typename... Ts>
struct CombinedStruct<T, Ts...> : T, CombinedStruct<Ts...> {};
struct OpBase {};
template <typename T, KeyType KEY_TYPE>
struct Op : OpBase {
static const KeyType key_type = KEY_TYPE;
};
struct VoidOp : Op<VoidOp, KEY_TYPE_X> {
protected:
template <typename T, KeyType KEY_TYPE> friend struct Op;
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
void Load(const Instr::Op& op) {}
};
struct OffsetOp : Op<OffsetOp, KEY_TYPE_O> {
uint64_t value;
protected:
template <typename T, KeyType KEY_TYPE> friend struct Op;
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
void Load(const Instr::Op& op) {
this->value = op.offset;
}
};
struct SymbolOp : Op<SymbolOp, KEY_TYPE_S> {
FunctionInfo* value;
protected:
template <typename T, KeyType KEY_TYPE> friend struct Op;
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
bool Load(const Instr::Op& op) {
this->value = op.symbol_info;
return true;
}
};
struct LabelOp : Op<LabelOp, KEY_TYPE_L> {
hir::Label* value;
protected:
template <typename T, KeyType KEY_TYPE> friend struct Op;
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
void Load(const Instr::Op& op) {
this->value = op.label;
}
};
template <typename T, KeyType KEY_TYPE, typename REG_TYPE, typename CONST_TYPE, int TAG = -1>
struct ValueOp : Op<ValueOp<T, KEY_TYPE, REG_TYPE, CONST_TYPE, TAG>, KEY_TYPE> {
typedef REG_TYPE reg_type;
static const int tag = TAG;
const Value* value;
bool is_constant;
virtual bool ConstantFitsIn32Reg() const { return true; }
const REG_TYPE& reg() const {
XEASSERT(!is_constant);
return reg_;
}
operator const REG_TYPE&() const {
return reg();
}
bool IsEqual(const T& b) const {
if (is_constant && b.is_constant) {
return reinterpret_cast<const T*>(this)->constant() == b.constant();
} else if (!is_constant && !b.is_constant) {
return reg_.getIdx() == b.reg_.getIdx();
} else {
return false;
}
}
bool IsEqual(const Xbyak::Reg& b) const {
if (is_constant) {
return false;
} else if (!is_constant) {
return reg_.getIdx() == b.getIdx();
} else {
return false;
}
}
bool operator== (const T& b) const {
return IsEqual(b);
}
bool operator!= (const T& b) const {
return !IsEqual(b);
}
bool operator== (const Xbyak::Reg& b) const {
return IsEqual(b);
}
bool operator!= (const Xbyak::Reg& b) const {
return !IsEqual(b);
}
void Load(const Instr::Op& op) {
const Value* value = op.value;
this->value = value;
is_constant = value->IsConstant();
if (!is_constant) {
X64Emitter::SetupReg(value, reg_);
}
}
protected:
REG_TYPE reg_;
};
template <int TAG = -1>
struct I8 : ValueOp<I8<TAG>, KEY_TYPE_V_I8, Reg8, int8_t, TAG> {
const int8_t constant() const {
XEASSERT(is_constant);
return value->constant.i8;
}
};
template <int TAG = -1>
struct I16 : ValueOp<I16<TAG>, KEY_TYPE_V_I16, Reg16, int16_t, TAG> {
const int16_t constant() const {
XEASSERT(is_constant);
return value->constant.i16;
}
};
template <int TAG = -1>
struct I32 : ValueOp<I32<TAG>, KEY_TYPE_V_I32, Reg32, int32_t, TAG> {
const int32_t constant() const {
XEASSERT(is_constant);
return value->constant.i32;
}
};
template <int TAG = -1>
struct I64 : ValueOp<I64<TAG>, KEY_TYPE_V_I64, Reg64, int64_t, TAG> {
const int64_t constant() const {
XEASSERT(is_constant);
return value->constant.i64;
}
bool ConstantFitsIn32Reg() const override {
int64_t v = value->constant.i64;
if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov.
return true;
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
// Negative number that fits in 32bits.
return true;
}
return false;
}
};
template <int TAG = -1>
struct F32 : ValueOp<F32<TAG>, KEY_TYPE_V_F32, Xmm, float, TAG> {
const float constant() const {
XEASSERT(is_constant);
return value->constant.f32;
}
};
template <int TAG = -1>
struct F64 : ValueOp<F64<TAG>, KEY_TYPE_V_F64, Xmm, double, TAG> {
const double constant() const {
XEASSERT(is_constant);
return value->constant.f64;
}
};
template <int TAG = -1>
struct V128 : ValueOp<V128<TAG>, KEY_TYPE_V_V128, Xmm, vec128_t, TAG> {
const vec128_t& constant() const {
XEASSERT(is_constant);
return value->constant.v128;
}
};
struct TagTable {
struct {
bool valid;
Instr::Op op;
} table[16];
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_X>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
return true;
}
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_L>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
return true;
}
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_O>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
return true;
}
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_S>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
return true;
}
template <typename T, typename std::enable_if<T::key_type >= KEY_TYPE_V_I8>::type* = nullptr>
bool CheckTag(const Instr::Op& op) {
const Value* value = op.value;
if (T::tag == -1) {
return true;
}
if (table[T::tag].valid &&
table[T::tag].op.value != value) {
return false;
}
table[T::tag].valid = true;
table[T::tag].op.value = (Value*)value;
return true;
}
};
template <typename DEST, typename... Tf>
struct DestField;
template <typename DEST>
struct DestField<DEST> {
DEST dest;
protected:
bool LoadDest(const Instr* i, TagTable& tag_table) {
Instr::Op op;
op.value = i->dest;
if (tag_table.CheckTag<DEST>(op)) {
dest.Load(op);
return true;
}
return false;
}
};
template <>
struct DestField<VoidOp> {
protected:
bool LoadDest(const Instr* i, TagTable& tag_table) {
return true;
}
};
template <hir::Opcode OPCODE, typename... Ts>
struct I;
template <hir::Opcode OPCODE, typename DEST>
struct I<OPCODE, DEST> : DestField<DEST> {
static const hir::Opcode opcode = OPCODE;
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type>::value;
static const KeyType dest_type = DEST::key_type;
const Instr* instr;
protected:
template <typename... Ti> friend struct SequenceFields;
bool Load(const Instr* i, TagTable& tag_table) {
if (InstrKey(i).value == key &&
LoadDest(i, tag_table)) {
instr = i;
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1>
struct I<OPCODE, DEST, SRC1> : DestField<DEST> {
static const hir::Opcode opcode = OPCODE;
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
const Instr* instr;
SRC1 src1;
protected:
template <typename... Ti> friend struct SequenceFields;
bool Load(const Instr* i, TagTable& tag_table) {
if (InstrKey(i).value == key &&
LoadDest(i, tag_table) &&
tag_table.CheckTag<SRC1>(i->src1)) {
instr = i;
src1.Load(i->src1);
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2>
struct I<OPCODE, DEST, SRC1, SRC2> : DestField<DEST> {
static const hir::Opcode opcode = OPCODE;
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type, SRC2::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
static const KeyType src2_type = SRC2::key_type;
const Instr* instr;
SRC1 src1;
SRC2 src2;
protected:
template <typename... Ti> friend struct SequenceFields;
bool Load(const Instr* i, TagTable& tag_table) {
if (InstrKey(i).value == key &&
LoadDest(i, tag_table) &&
tag_table.CheckTag<SRC1>(i->src1) &&
tag_table.CheckTag<SRC2>(i->src2)) {
instr = i;
src1.Load(i->src1);
src2.Load(i->src2);
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2, typename SRC3>
struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
static const hir::Opcode opcode = OPCODE;
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type, SRC2::key_type, SRC3::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
static const KeyType src2_type = SRC2::key_type;
static const KeyType src3_type = SRC3::key_type;
const Instr* instr;
SRC1 src1;
SRC2 src2;
SRC3 src3;
protected:
template <typename... Ti> friend struct SequenceFields;
bool Load(const Instr* i, TagTable& tag_table) {
if (InstrKey(i).value == key &&
LoadDest(i, tag_table) &&
tag_table.CheckTag<SRC1>(i->src1) &&
tag_table.CheckTag<SRC2>(i->src2) &&
tag_table.CheckTag<SRC3>(i->src3)) {
instr = i;
src1.Load(i->src1);
src2.Load(i->src2);
src3.Load(i->src3);
return true;
}
return false;
}
};
template <typename... Ti>
struct SequenceFields;
template <typename I1>
struct SequenceFields<I1> {
I1 i1;
typedef typename I1 I1Type;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (i1.Load(i, tag_table)) {
*new_tail = i->next;
return true;
}
return false;
}
};
template <typename I1, typename I2>
struct SequenceFields<I1, I2> : SequenceFields<I1> {
I2 i2;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (SequenceFields<I1>::Check(i, tag_table, new_tail)) {
auto ni = i->next;
if (ni && i2.Load(ni, tag_table)) {
*new_tail = ni;
return i;
}
}
return false;
}
};
template <typename I1, typename I2, typename I3>
struct SequenceFields<I1, I2, I3> : SequenceFields<I1, I2> {
I3 i3;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (SequenceFields<I1, I2>::Check(i, tag_table, new_tail)) {
auto ni = i->next;
if (ni && i3.Load(ni, tag_table)) {
*new_tail = ni;
return i;
}
}
return false;
}
};
template <typename I1, typename I2, typename I3, typename I4>
struct SequenceFields<I1, I2, I3, I4> : SequenceFields<I1, I2, I3> {
I4 i4;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (SequenceFields<I1, I2, I3>::Check(i, tag_table, new_tail)) {
auto ni = i->next;
if (ni && i4.Load(ni, tag_table)) {
*new_tail = ni;
return i;
}
}
return false;
}
};
template <typename I1, typename I2, typename I3, typename I4, typename I5>
struct SequenceFields<I1, I2, I3, I4, I5> : SequenceFields<I1, I2, I3, I4> {
I5 i5;
protected:
template <typename SEQ, typename... Ti> friend struct Sequence;
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
if (SequenceFields<I1, I2, I3, I4>::Check(i, tag_table, new_tail)) {
auto ni = i->next;
if (ni && i5.Load(ni, tag_table)) {
*new_tail = ni;
return i;
}
}
return false;
}
};
template <typename SEQ, typename... Ti>
struct Sequence {
struct EmitArgs : SequenceFields<Ti...> {};
static bool Select(X64Emitter& e, const Instr* i, const Instr** new_tail) {
EmitArgs args;
TagTable tag_table;
if (!args.Check(i, tag_table, new_tail)) {
return false;
}
SEQ::Emit(e, args);
return true;
}
};
template <typename T>
const T GetTempReg(X64Emitter& e);
template <>
const Reg8 GetTempReg<Reg8>(X64Emitter& e) {
return e.al;
}
template <>
const Reg16 GetTempReg<Reg16>(X64Emitter& e) {
return e.ax;
}
template <>
const Reg32 GetTempReg<Reg32>(X64Emitter& e) {
return e.eax;
}
template <>
const Reg64 GetTempReg<Reg64>(X64Emitter& e) {
return e.rax;
}
template <typename SEQ, typename T>
struct SingleSequence : public Sequence<SingleSequence<SEQ, T>, T> {
typedef T EmitArgType;
static const uint32_t head_key = T::key;
static void Emit(X64Emitter& e, const EmitArgs& _) {
SEQ::Emit(e, _.i1);
}
template <typename REG_FN>
static void EmitUnaryOp(
X64Emitter& e, const EmitArgType& i,
const REG_FN& reg_fn) {
if (i.src1.is_constant) {
e.mov(i.dest, i.src1.constant());
reg_fn(e, i.dest);
} else {
if (i.dest != i.src1) {
e.mov(i.dest, i.src1);
}
reg_fn(e, i.dest);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitCommutativeBinaryOp(
X64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
if (i.dest == i.src2) {
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src1.constant()));
} else {
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
e.mov(temp, i.src1.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.mov(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2);
}
} else if (i.src2.is_constant) {
if (i.dest == i.src1) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.mov(i.dest, i.src2.constant());
reg_reg_fn(e, i.dest, i.src1);
}
} else {
if (i.dest == i.src1) {
reg_reg_fn(e, i.dest, i.src2);
} else if (i.dest == i.src2) {
reg_reg_fn(e, i.dest, i.src1);
} else {
e.mov(i.dest, i.src1);
reg_reg_fn(e, i.dest, i.src2);
}
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitAssociativeBinaryOp(
X64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
if (i.dest == i.src2) {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2);
e.mov(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, temp);
} else {
e.mov(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2);
}
} else if (i.src2.is_constant) {
if (i.dest == i.src1) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.mov(i.dest, i.src1);
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
}
} else {
if (i.dest == i.src1) {
reg_reg_fn(e, i.dest, i.src2);
} else if (i.dest == i.src2) {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2);
e.mov(i.dest, i.src1);
reg_reg_fn(e, i.dest, temp);
} else {
e.mov(i.dest, i.src1);
reg_reg_fn(e, i.dest, i.src2);
}
}
}
template <typename FN>
static void EmitCommutativeBinaryXmmOp(
X64Emitter& e, const EmitArgType& i, const FN& fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
e.LoadConstantXmm(e.xmm0, i.src1.constant());
fn(e, i.dest, e.xmm0, i.src2);
} else if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
fn(e, i.dest, i.src1, e.xmm0);
} else {
fn(e, i.dest, i.src1, i.src2);
}
}
template <typename FN>
static void EmitAssociativeBinaryXmmOp(
X64Emitter& e, const EmitArgType& i, const FN& fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
e.LoadConstantXmm(e.xmm0, i.src1.constant());
fn(e, i.dest, e.xmm0, i.src2);
} else if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
fn(e, i.dest, i.src1, e.xmm0);
} else {
fn(e, i.dest, i.src1, i.src2);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitCommutativeCompareOp(
X64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.src2, static_cast<int32_t>(i.src1.constant()));
} else {
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
e.mov(temp, i.src1.constant());
reg_reg_fn(e, i.src2, temp);
}
} else if (i.src2.is_constant) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.src1, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.src1, temp);
}
} else {
reg_reg_fn(e, i.src1, i.src2);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitAssociativeCompareOp(
X64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, i.src2, static_cast<int32_t>(i.src1.constant()), true);
} else {
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
e.mov(temp, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2, temp, true);
}
} else if (i.src2.is_constant) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, i.src1, static_cast<int32_t>(i.src2.constant()), false);
} else {
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
e.mov(temp, i.src2.constant());
reg_reg_fn(e, i.dest, i.src1, temp, false);
}
} else {
reg_reg_fn(e, i.dest, i.src1, i.src2, false);
}
}
};
static const int ANY = -1;
typedef int tag_t;
static const tag_t TAG0 = 0;
static const tag_t TAG1 = 1;
static const tag_t TAG2 = 2;
static const tag_t TAG3 = 3;
static const tag_t TAG4 = 4;
static const tag_t TAG5 = 5;
static const tag_t TAG6 = 6;
static const tag_t TAG7 = 7;
typedef bool (*SequenceSelectFn)(X64Emitter&, const Instr*, const Instr**);
template <typename T>
void Register() {
sequence_table.insert({ T::head_key, T::Select });
}
template <typename T, typename Tn, typename... Ts>
void Register() {
Register<T>();
Register<Tn, Ts...>();
};
#define EMITTER_OPCODE_TABLE(name, ...) \
void Register_##name() { \
Register<__VA_ARGS__>(); \
}
#define MATCH(...) __VA_ARGS__
#define EMITTER(name, match) struct name : SingleSequence<name, match>
#define SEQUENCE(name, match) struct name : Sequence<name, match>
} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -2,32 +2,32 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. * * Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ #ifndef ALLOY_BACKEND_X64_X64_SEQUENCES_H_
#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ #define ALLOY_BACKEND_X64_X64_SEQUENCES_H_
#include <alloy/core.h> #include <alloy/core.h>
#include <alloy/hir/instr.h>
XEDECLARECLASS2(alloy, hir, Instr);
namespace alloy { namespace alloy {
namespace backend { namespace backend {
namespace x64 { namespace x64 {
namespace lowering {
class LoweringTable; class X64Emitter;
void RegisterSequences(LoweringTable* table);
void RegisterSequences();
bool SelectSequence(X64Emitter& e, const hir::Instr* i, const hir::Instr** new_tail);
} // namespace lowering
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace alloy } // namespace alloy
#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ #endif // ALLOY_BACKEND_X64_X64_SEQUENCES_H_

View File

@ -0,0 +1,145 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/backend/x64/x64_thunk_emitter.h>
#include <third_party/xbyak/xbyak/xbyak.h>
using namespace alloy;
using namespace alloy::backend;
using namespace alloy::backend::x64;
using namespace Xbyak;
X64ThunkEmitter::X64ThunkEmitter(
X64Backend* backend, XbyakAllocator* allocator) :
X64Emitter(backend, allocator) {
}
X64ThunkEmitter::~X64ThunkEmitter() {
}
HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
// rcx = target
// rdx = arg0
// r8 = arg1
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
// rsp + 0 = return address
mov(qword[rsp + 8 * 3], r8);
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);
mov(qword[rsp + 48], rbx);
mov(qword[rsp + 56], rcx);
mov(qword[rsp + 64], rbp);
mov(qword[rsp + 72], rsi);
mov(qword[rsp + 80], rdi);
mov(qword[rsp + 88], r12);
mov(qword[rsp + 96], r13);
mov(qword[rsp + 104], r14);
mov(qword[rsp + 112], r15);
/*movaps(ptr[rsp + 128], xmm6);
movaps(ptr[rsp + 144], xmm7);
movaps(ptr[rsp + 160], xmm8);
movaps(ptr[rsp + 176], xmm9);
movaps(ptr[rsp + 192], xmm10);
movaps(ptr[rsp + 208], xmm11);
movaps(ptr[rsp + 224], xmm12);
movaps(ptr[rsp + 240], xmm13);
movaps(ptr[rsp + 256], xmm14);
movaps(ptr[rsp + 272], xmm15);*/
mov(rax, rcx);
mov(rcx, rdx);
mov(rdx, r8);
call(rax);
/*movaps(xmm6, ptr[rsp + 128]);
movaps(xmm7, ptr[rsp + 144]);
movaps(xmm8, ptr[rsp + 160]);
movaps(xmm9, ptr[rsp + 176]);
movaps(xmm10, ptr[rsp + 192]);
movaps(xmm11, ptr[rsp + 208]);
movaps(xmm12, ptr[rsp + 224]);
movaps(xmm13, ptr[rsp + 240]);
movaps(xmm14, ptr[rsp + 256]);
movaps(xmm15, ptr[rsp + 272]);*/
mov(rbx, qword[rsp + 48]);
mov(rcx, qword[rsp + 56]);
mov(rbp, qword[rsp + 64]);
mov(rsi, qword[rsp + 72]);
mov(rdi, qword[rsp + 80]);
mov(r12, qword[rsp + 88]);
mov(r13, qword[rsp + 96]);
mov(r14, qword[rsp + 104]);
mov(r15, qword[rsp + 112]);
add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
mov(r8, qword[rsp + 8 * 3]);
ret();
void* fn = Emplace(stack_size);
return (HostToGuestThunk)fn;
}
GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
// rcx = context
// rdx = target function
// r8 = arg0
// r9 = arg1
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
// rsp + 0 = return address
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);
mov(qword[rsp + 48], rbx);
mov(qword[rsp + 56], rcx);
mov(qword[rsp + 64], rbp);
mov(qword[rsp + 72], rsi);
mov(qword[rsp + 80], rdi);
mov(qword[rsp + 88], r12);
mov(qword[rsp + 96], r13);
mov(qword[rsp + 104], r14);
mov(qword[rsp + 112], r15);
// TODO(benvanik): save things? XMM0-5?
mov(rax, rdx);
mov(rdx, r8);
mov(r8, r9);
call(rax);
mov(rbx, qword[rsp + 48]);
mov(rcx, qword[rsp + 56]);
mov(rbp, qword[rsp + 64]);
mov(rsi, qword[rsp + 72]);
mov(rdi, qword[rsp + 80]);
mov(r12, qword[rsp + 88]);
mov(r13, qword[rsp + 96]);
mov(r14, qword[rsp + 104]);
mov(r15, qword[rsp + 112]);
add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
ret();
void* fn = Emplace(stack_size);
return (HostToGuestThunk)fn;
}

View File

@ -0,0 +1,147 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_X64_X64_THUNK_EMITTER_H_
#define XENIA_CPU_X64_X64_THUNK_EMITTER_H_
#include <alloy/core.h>
#include <alloy/backend/x64/x64_backend.h>
#include <alloy/backend/x64/x64_emitter.h>
namespace alloy {
namespace backend {
namespace x64 {
/**
* Stack Layout
* ----------------------------
* NOTE: stack must always be 16b aligned.
*
* Thunk stack:
* +------------------+
* | arg temp, 3 * 8 | rsp + 0
* | |
* | |
* +------------------+
* | scratch, 16b | rsp + 32
* | |
* +------------------+
* | rbx | rsp + 48
* +------------------+
* | rcx / context | rsp + 56
* +------------------+
* | rbp | rsp + 64
* +------------------+
* | rsi | rsp + 72
* +------------------+
* | rdi | rsp + 80
* +------------------+
* | r12 | rsp + 88
* +------------------+
* | r13 | rsp + 96
* +------------------+
* | r14 | rsp + 104
* +------------------+
* | r15 | rsp + 112
* +------------------+
* | (return address) | rsp + 120
* +------------------+
* | (rcx home) | rsp + 128
* +------------------+
* | (rdx home) | rsp + 136
* +------------------+
*
*
* TODO:
* +------------------+
* | xmm6 | rsp + 128
* | |
* +------------------+
* | xmm7 | rsp + 144
* | |
* +------------------+
* | xmm8 | rsp + 160
* | |
* +------------------+
* | xmm9 | rsp + 176
* | |
* +------------------+
* | xmm10 | rsp + 192
* | |
* +------------------+
* | xmm11 | rsp + 208
* | |
* +------------------+
* | xmm12 | rsp + 224
* | |
* +------------------+
* | xmm13 | rsp + 240
* | |
* +------------------+
* | xmm14 | rsp + 256
* | |
* +------------------+
* | xmm15 | rsp + 272
* | |
* +------------------+
*
* Guest stack:
* +------------------+
* | arg temp, 3 * 8 | rsp + 0
* | |
* | |
* +------------------+
* | scratch, 32b | rsp + 32
* | |
* +------------------+
* | rcx / context | rsp + 64
* +------------------+
* | guest ret addr | rsp + 72
* +------------------+
* | call ret addr | rsp + 80
* +------------------+
* ... locals ...
* +------------------+
* | (return address) |
* +------------------+
*
*/
class StackLayout {
public:
const static size_t THUNK_STACK_SIZE = 120;
const static size_t GUEST_STACK_SIZE = 88;
const static size_t GUEST_RCX_HOME = 64;
const static size_t GUEST_RET_ADDR = 72;
const static size_t GUEST_CALL_RET_ADDR = 80;
};
class X64ThunkEmitter : public X64Emitter {
public:
X64ThunkEmitter(X64Backend* backend, XbyakAllocator* allocator);
virtual ~X64ThunkEmitter();
// Call a generated function, saving all stack parameters.
HostToGuestThunk EmitHostToGuestThunk();
// Function that guest code can call to transition into host code.
GuestToHostThunk EmitGuestToHostThunk();
};
} // namespace x64
} // namespace backend
} // namespace alloy
#endif // XENIA_CPU_X64_X64_THUNK_EMITTER_H_

View File

@ -0,0 +1,200 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/backend/x64/x64_tracers.h>
#include <alloy/backend/x64/x64_emitter.h>
#include <alloy/runtime/runtime.h>
#include <alloy/runtime/thread_state.h>
using namespace alloy;
using namespace alloy::backend::x64;
using namespace alloy::runtime;
namespace alloy {
namespace backend {
namespace x64 {
#define ITRACE 0
#define DTRACE 0
#define TARGET_THREAD 1
#define IFLUSH() fflush(stdout)
#define IPRINT if (thread_state->thread_id() == TARGET_THREAD) printf
#define DFLUSH() fflush(stdout)
#define DPRINT DFLUSH(); if (thread_state->thread_id() == TARGET_THREAD) printf
uint32_t GetTracingMode() {
uint32_t mode = 0;
#if ITRACE
mode |= TRACING_INSTR;
#endif // ITRACE
#if DTRACE
mode |= TRACING_DATA;
#endif // DTRACE
return mode;
}
void TraceString(void* raw_context, const char* str) {
auto thread_state = *((ThreadState**)raw_context);
IPRINT("XE[t] :%d: %s\n", thread_state->thread_id(), str);
IFLUSH();
}
void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%d (%X) = ctx i8 +%d\n", (int8_t)value, value, offset);
}
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%d (%X) = ctx i16 +%d\n", (int16_t)value, value, offset);
}
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%d (%X) = ctx i32 +%d\n", (int32_t)value, value, offset);
}
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%lld (%llX) = ctx i64 +%d\n", (int64_t)value, value, offset);
}
void TraceContextLoadF32(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%e (%X) = ctx f32 +%d\n", value.m128_f32[0], value.m128_i32[0], offset);
}
void TraceContextLoadF64(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
union {
double d;
uint64_t x;
} f;
f.x = value.m128_i64[0];
DPRINT("%lle (%llX) = ctx f64 +%d\n", f.d, value.m128_i64[0], offset);
}
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = ctx v128 +%d\n",
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3],
offset);
}
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("ctx i8 +%d = %d (%X)\n", offset, (int8_t)value, value);
}
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("ctx i16 +%d = %d (%X)\n", offset, (int16_t)value, value);
}
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("ctx i32 +%d = %d (%X)\n", offset, (int32_t)value, value);
}
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("ctx i64 +%d = %lld (%llX)\n", offset, (int64_t)value, value);
}
void TraceContextStoreF32(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("ctx f32 +%d = %e (%X)\n", offset, value.m128_i32[0], value.m128_f32[0]);
}
void TraceContextStoreF64(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
union {
double d;
uint64_t x;
} f;
f.x = value.m128_i64[0];
DPRINT("ctx f64 +%d = %lle (%llX)\n", offset, value.m128_i64[0], f.d);
}
void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", offset,
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3]);
}
void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%d (%X) = load.i8 %.8X\n", (int8_t)value, value, address);
}
void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%d (%X) = load.i16 %.8X\n", (int16_t)value, value, address);
}
void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%d (%X) = load.i32 %.8X\n", (int32_t)value, value, address);
}
void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%lld (%llX) = load.i64 %.8X\n", (int64_t)value, value, address);
}
void TraceMemoryLoadF32(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("%e (%X) = load.f32 %.8X\n", value.m128_f32[0], value.m128_i32[0], address);
}
void TraceMemoryLoadF64(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
union {
double d;
uint64_t x;
} f;
f.x = value.m128_i64[0];
DPRINT("%lle (%llX) = load.f64 %.8X\n", f.d, value.m128_i64[0], address);
}
void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load.v128 %.8X\n",
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3],
address);
}
void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("store.i8 %.8X = %d (%X)\n", address, (int8_t)value, value);
}
void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("store.i16 %.8X = %d (%X)\n", address, (int16_t)value, value);
}
void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("store.i32 %.8X = %d (%X)\n", address, (int32_t)value, value);
}
void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("store.i64 %.8X = %lld (%llX)\n", address, (int64_t)value, value);
}
void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("store.f32 %.8X = %e (%X)\n", address, value.m128_f32[0], value.m128_i32[0]);
}
void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
union {
double d;
uint64_t x;
} f;
f.x = value.m128_i64[0];
DPRINT("store.f64 %.8X = %lle (%llX)\n", address, f.d, value.m128_i64[0]);
}
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context);
DPRINT("store.v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", address,
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3]);
}
} // namespace x64
} // namespace backend
} // namespace alloy

View File

@ -0,0 +1,85 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_BACKEND_X64_X64_TRACERS_H_
#define ALLOY_BACKEND_X64_X64_TRACERS_H_
#include <alloy/core.h>
#if XE_LIKE_WIN32
#include <xmmintrin.h>
#else
typedef union __declspec(align(16)) __m128 {
float m128_f32[4];
uint64_t m128_u64[2];
int8_t m128_i8[16];
int16_t m128_i16[8];
int32_t m128_i32[4];
int64_t m128_i64[2];
uint8_t m128_u8[16];
uint16_t m128_u16[8];
uint32_t m128_u32[4];
} __m128;
#endif
namespace alloy {
namespace backend {
namespace x64 {
class X64Emitter;
enum TracingMode {
TRACING_INSTR = (1 << 1),
TRACING_DATA = (1 << 2),
};
uint32_t GetTracingMode();
inline bool IsTracingInstr() { return (GetTracingMode() & TRACING_INSTR) != 0; }
inline bool IsTracingData() { return (GetTracingMode() & TRACING_DATA) != 0; }
void TraceString(void* raw_context, const char* str);
void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value);
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value);
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value);
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value);
void TraceContextLoadF32(void* raw_context, uint64_t offset, __m128 value);
void TraceContextLoadF64(void* raw_context, uint64_t offset, __m128 value);
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value);
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value);
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value);
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value);
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value);
void TraceContextStoreF32(void* raw_context, uint64_t offset, __m128 value);
void TraceContextStoreF64(void* raw_context, uint64_t offset, __m128 value);
void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value);
void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value);
void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value);
void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value);
void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value);
void TraceMemoryLoadF32(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryLoadF64(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value);
void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value);
void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value);
void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value);
void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value);
} // namespace x64
} // namespace backend
} // namespace alloy
#endif // ALLOY_BACKEND_X64_X64_TRACERS_H_

View File

@ -20,6 +20,8 @@ using namespace alloy::runtime;
Compiler::Compiler(Runtime* runtime) : Compiler::Compiler(Runtime* runtime) :
runtime_(runtime) { runtime_(runtime) {
scratch_arena_ = new Arena();
alloy::tracing::WriteEvent(EventType::Init({ alloy::tracing::WriteEvent(EventType::Init({
})); }));
} }
@ -32,6 +34,8 @@ Compiler::~Compiler() {
delete pass; delete pass;
} }
delete scratch_arena_;
alloy::tracing::WriteEvent(EventType::Deinit({ alloy::tracing::WriteEvent(EventType::Deinit({
})); }));
} }
@ -45,10 +49,13 @@ void Compiler::Reset() {
} }
int Compiler::Compile(HIRBuilder* builder) { int Compiler::Compile(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// TODO(benvanik): sophisticated stuff. Run passes in parallel, run until they // TODO(benvanik): sophisticated stuff. Run passes in parallel, run until they
// stop changing things, etc. // stop changing things, etc.
for (auto it = passes_.begin(); it != passes_.end(); ++it) { for (auto it = passes_.begin(); it != passes_.end(); ++it) {
CompilerPass* pass = *it; CompilerPass* pass = *it;
scratch_arena_->Reset();
if (pass->Run(builder)) { if (pass->Run(builder)) {
return 1; return 1;
} }

View File

@ -28,6 +28,7 @@ public:
~Compiler(); ~Compiler();
runtime::Runtime* runtime() const { return runtime_; } runtime::Runtime* runtime() const { return runtime_; }
Arena* scratch_arena() const { return scratch_arena_; }
void AddPass(CompilerPass* pass); void AddPass(CompilerPass* pass);
@ -37,6 +38,7 @@ public:
private: private:
runtime::Runtime* runtime_; runtime::Runtime* runtime_;
Arena* scratch_arena_;
typedef std::vector<CompilerPass*> PassList; typedef std::vector<CompilerPass*> PassList;
PassList passes_; PassList passes_;

View File

@ -27,3 +27,7 @@ int CompilerPass::Initialize(Compiler* compiler) {
compiler_ = compiler; compiler_ = compiler;
return 0; return 0;
} }
Arena* CompilerPass::scratch_arena() const {
return compiler_->scratch_arena();
}

View File

@ -32,6 +32,9 @@ public:
virtual int Run(hir::HIRBuilder* builder) = 0; virtual int Run(hir::HIRBuilder* builder) = 0;
protected:
Arena* scratch_arena() const;
protected: protected:
runtime::Runtime* runtime_; runtime::Runtime* runtime_;
Compiler* compiler_; Compiler* compiler_;

View File

@ -11,11 +11,15 @@
#define ALLOY_COMPILER_COMPILER_PASSES_H_ #define ALLOY_COMPILER_COMPILER_PASSES_H_
#include <alloy/compiler/passes/constant_propagation_pass.h> #include <alloy/compiler/passes/constant_propagation_pass.h>
#include <alloy/compiler/passes/control_flow_analysis_pass.h>
#include <alloy/compiler/passes/context_promotion_pass.h> #include <alloy/compiler/passes/context_promotion_pass.h>
#include <alloy/compiler/passes/data_flow_analysis_pass.h>
#include <alloy/compiler/passes/dead_code_elimination_pass.h> #include <alloy/compiler/passes/dead_code_elimination_pass.h>
#include <alloy/compiler/passes/finalization_pass.h>
//#include <alloy/compiler/passes/dead_store_elimination_pass.h> //#include <alloy/compiler/passes/dead_store_elimination_pass.h>
#include <alloy/compiler/passes/finalization_pass.h>
#include <alloy/compiler/passes/register_allocation_pass.h>
#include <alloy/compiler/passes/simplification_pass.h> #include <alloy/compiler/passes/simplification_pass.h>
#include <alloy/compiler/passes/validation_pass.h>
#include <alloy/compiler/passes/value_reduction_pass.h> #include <alloy/compiler/passes/value_reduction_pass.h>
// TODO: // TODO:
@ -134,5 +138,42 @@
// store_context +302, v5 // store_context +302, v5
// branch_true v5, ... // branch_true v5, ...
// //
// - X86Canonicalization
// For various opcodes add copies/commute the arguments to match x86
// operand semantics. This makes code generation easier and if done
// before register allocation can prevent a lot of extra shuffling in
// the emitted code.
//
// Example:
// <block0>:
// v0 = ...
// v1 = ...
// v2 = add v0, v1 <-- v1 now unused
// Becomes:
// v0 = ...
// v1 = ...
// v1 = add v1, v0 <-- src1 = dest/src, so reuse for both
// by commuting and setting dest = src1
//
// - RegisterAllocation
// Given a machine description (register classes, counts) run over values
// and assign them to registers, adding spills as needed. It should be
// possible to directly emit code from this form.
//
// Example:
// <block0>:
// v0 = load_context +0
// v1 = load_context +1
// v0 = add v0, v1
// ...
// v2 = mul v0, v1
// Becomes:
// reg0 = load_context +0
// reg1 = load_context +1
// reg2 = add reg0, reg1
// store_local +123, reg2 <-- spill inserted
// ...
// reg0 = load_local +123 <-- load inserted
// reg0 = mul reg0, reg1
#endif // ALLOY_COMPILER_COMPILER_PASSES_H_ #endif // ALLOY_COMPILER_COMPILER_PASSES_H_

View File

@ -9,6 +9,9 @@
#include <alloy/compiler/passes/constant_propagation_pass.h> #include <alloy/compiler/passes/constant_propagation_pass.h>
#include <alloy/runtime/function.h>
#include <alloy/runtime/runtime.h>
using namespace alloy; using namespace alloy;
using namespace alloy::compiler; using namespace alloy::compiler;
using namespace alloy::compiler::passes; using namespace alloy::compiler::passes;
@ -23,6 +26,8 @@ ConstantPropagationPass::~ConstantPropagationPass() {
} }
int ConstantPropagationPass::Run(HIRBuilder* builder) { int ConstantPropagationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Once ContextPromotion has run there will likely be a whole slew of // Once ContextPromotion has run there will likely be a whole slew of
// constants that can be pushed through the function. // constants that can be pushed through the function.
// Example: // Example:
@ -41,6 +46,14 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
// v1 = add 1000, 1000 // v1 = add 1000, 1000
// store_context +200, 2000 // store_context +200, 2000
// A DCE run after this should clean up any of the values no longer needed. // A DCE run after this should clean up any of the values no longer needed.
//
// Special care needs to be taken with paired instructions. For example,
// DID_CARRY needs to be set as a constant:
// v1 = sub.2 20, 1
// v2 = did_carry v1
// should become:
// v1 = 19
// v2 = 0
Block* block = builder->first_block(); Block* block = builder->first_block();
while (block) { while (block) {
@ -79,6 +92,17 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
} }
} }
break; break;
case OPCODE_CALL_INDIRECT:
if (i->src1.value->IsConstant()) {
runtime::FunctionInfo* symbol_info;
if (runtime_->LookupFunctionInfo(
(uint32_t)i->src1.value->constant.i32, &symbol_info)) {
break;
}
i->Replace(&OPCODE_CALL_info, i->flags);
i->src1.symbol_info = symbol_info;
}
break;
case OPCODE_CALL_INDIRECT_TRUE: case OPCODE_CALL_INDIRECT_TRUE:
if (i->src1.value->IsConstant()) { if (i->src1.value->IsConstant()) {
if (i->src1.value->IsConstantTrue()) { if (i->src1.value->IsConstantTrue()) {
@ -179,20 +203,112 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
break; break;
// TODO(benvanik): compares // TODO(benvanik): compares
case OPCODE_COMPARE_EQ:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantEQ(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_NE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantNE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_SLT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantSLT(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_SLE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantSLE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_SGT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantSGT(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_SGE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantSGE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_ULT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantULT(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_ULE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantULE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_UGT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantUGT(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_UGE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantUGE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_DID_CARRY:
XEASSERT(!i->src1.value->IsConstant());
break;
case OPCODE_DID_OVERFLOW:
XEASSERT(!i->src1.value->IsConstant());
break;
case OPCODE_DID_SATURATE:
XEASSERT(!i->src1.value->IsConstant());
break;
case OPCODE_ADD: case OPCODE_ADD:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value); v->set_from(i->src1.value);
v->Add(i->src2.value); bool did_carry = v->Add(i->src2.value);
bool propagate_carry = !!(i->flags & ARITHMETIC_SET_CARRY);
i->Remove(); i->Remove();
// If carry is set find the DID_CARRY and fix it.
if (propagate_carry) {
PropagateCarry(v, did_carry);
}
} }
break; break;
// TODO(benvanik): ADD_CARRY // TODO(benvanik): ADD_CARRY (w/ ARITHMETIC_SET_CARRY)
case OPCODE_SUB: case OPCODE_SUB:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value); v->set_from(i->src1.value);
v->Sub(i->src2.value); bool did_carry = v->Sub(i->src2.value);
bool propagate_carry = !!(i->flags & ARITHMETIC_SET_CARRY);
i->Remove(); i->Remove();
// If carry is set find the DID_CARRY and fix it.
if (propagate_carry) {
PropagateCarry(v, did_carry);
}
} }
break; break;
case OPCODE_MUL: case OPCODE_MUL:
@ -298,6 +414,13 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
i->Remove(); i->Remove();
} }
break; break;
case OPCODE_CNTLZ:
if (i->src1.value->IsConstant()) {
v->set_zero(v->type);
v->CountLeadingZeros(i->src1.value);
i->Remove();
}
break;
// TODO(benvanik): INSERT/EXTRACT // TODO(benvanik): INSERT/EXTRACT
// TODO(benvanik): SPLAT/PERMUTE/SWIZZLE // TODO(benvanik): SPLAT/PERMUTE/SWIZZLE
case OPCODE_SPLAT: case OPCODE_SPLAT:
@ -314,3 +437,16 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
return 0; return 0;
} }
void ConstantPropagationPass::PropagateCarry(hir::Value* v, bool did_carry) {
auto next = v->use_head;
while (next) {
auto use = next;
next = use->next;
if (use->instr->opcode == &OPCODE_DID_CARRY_info) {
// Replace carry value.
use->instr->dest->set_constant(did_carry ? 1 : 0);
use->instr->Remove();
}
}
}

View File

@ -26,6 +26,7 @@ public:
virtual int Run(hir::HIRBuilder* builder); virtual int Run(hir::HIRBuilder* builder);
private: private:
void PropagateCarry(hir::Value* v, bool did_carry);
}; };

View File

@ -9,6 +9,8 @@
#include <alloy/compiler/passes/context_promotion_pass.h> #include <alloy/compiler/passes/context_promotion_pass.h>
#include <gflags/gflags.h>
#include <alloy/compiler/compiler.h> #include <alloy/compiler/compiler.h>
#include <alloy/runtime/runtime.h> #include <alloy/runtime/runtime.h>
@ -20,6 +22,10 @@ using namespace alloy::hir;
using namespace alloy::runtime; using namespace alloy::runtime;
DEFINE_bool(store_all_context_values, false,
"Don't strip dead context stores to aid in debugging.");
ContextPromotionPass::ContextPromotionPass() : ContextPromotionPass::ContextPromotionPass() :
context_values_size_(0), context_values_(0), context_values_size_(0), context_values_(0),
CompilerPass() { CompilerPass() {
@ -45,6 +51,8 @@ int ContextPromotionPass::Initialize(Compiler* compiler) {
} }
int ContextPromotionPass::Run(HIRBuilder* builder) { int ContextPromotionPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Like mem2reg, but because context memory is unaliasable it's easier to // Like mem2reg, but because context memory is unaliasable it's easier to
// check and convert LoadContext/StoreContext into value operations. // check and convert LoadContext/StoreContext into value operations.
// Example of load->value promotion: // Example of load->value promotion:
@ -69,11 +77,13 @@ int ContextPromotionPass::Run(HIRBuilder* builder) {
} }
// Remove all dead stores. // Remove all dead stores.
if (!FLAGS_store_all_context_values) {
block = builder->first_block(); block = builder->first_block();
while (block) { while (block) {
RemoveDeadStoresBlock(block); RemoveDeadStoresBlock(block);
block = block->next; block = block->next;
} }
}
return 0; return 0;
} }

View File

@ -0,0 +1,69 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/compiler/passes/control_flow_analysis_pass.h>
#include <alloy/backend/backend.h>
#include <alloy/compiler/compiler.h>
#include <alloy/runtime/runtime.h>
using namespace alloy;
using namespace alloy::backend;
using namespace alloy::compiler;
using namespace alloy::compiler::passes;
using namespace alloy::frontend;
using namespace alloy::hir;
using namespace alloy::runtime;
ControlFlowAnalysisPass::ControlFlowAnalysisPass() :
CompilerPass() {
}
ControlFlowAnalysisPass::~ControlFlowAnalysisPass() {
}
int ControlFlowAnalysisPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// TODO(benvanik): reset edges for all blocks? Needed to be re-runnable.
// Add edges.
auto block = builder->first_block();
while (block) {
auto instr = block->instr_tail;
while (instr) {
if ((instr->opcode->flags & OPCODE_FLAG_BRANCH) == 0) {
break;
}
if (instr->opcode == &OPCODE_BRANCH_info) {
auto label = instr->src1.label;
builder->AddEdge(block, label->block, Edge::UNCONDITIONAL);
} else if (instr->opcode == &OPCODE_BRANCH_TRUE_info ||
instr->opcode == &OPCODE_BRANCH_FALSE_info) {
auto label = instr->src2.label;
builder->AddEdge(block, label->block, 0);
}
instr = instr->prev;
}
block = block->next;
}
// Mark dominators.
block = builder->first_block();
while (block) {
if (block->incoming_edge_head &&
!block->incoming_edge_head->incoming_next) {
block->incoming_edge_head->flags |= Edge::DOMINATES;
}
block = block->next;
}
return 0;
}

View File

@ -0,0 +1,37 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_COMPILER_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
#define ALLOY_COMPILER_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
#include <alloy/compiler/compiler_pass.h>
namespace alloy {
namespace compiler {
namespace passes {
class ControlFlowAnalysisPass : public CompilerPass {
public:
ControlFlowAnalysisPass();
virtual ~ControlFlowAnalysisPass();
virtual int Run(hir::HIRBuilder* builder);
private:
};
} // namespace passes
} // namespace compiler
} // namespace alloy
#endif // ALLOY_COMPILER_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_

View File

@ -0,0 +1,203 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/compiler/passes/data_flow_analysis_pass.h>
#include <alloy/backend/backend.h>
#include <alloy/compiler/compiler.h>
#include <alloy/runtime/runtime.h>
#pragma warning(push)
#pragma warning(disable : 4244)
#pragma warning(disable : 4267)
#include <llvm/ADT/BitVector.h>
#pragma warning(pop)
using namespace alloy;
using namespace alloy::backend;
using namespace alloy::compiler;
using namespace alloy::compiler::passes;
using namespace alloy::frontend;
using namespace alloy::hir;
using namespace alloy::runtime;
DataFlowAnalysisPass::DataFlowAnalysisPass() :
CompilerPass() {
}
DataFlowAnalysisPass::~DataFlowAnalysisPass() {
}
int DataFlowAnalysisPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Linearize blocks so that we can detect cycles and propagate dependencies.
uint32_t block_count = LinearizeBlocks(builder);
// Analyze value flow and add locals as needed.
AnalyzeFlow(builder, block_count);
return 0;
}
uint32_t DataFlowAnalysisPass::LinearizeBlocks(HIRBuilder* builder) {
// TODO(benvanik): actually do this - we cheat now knowing that they are in
// sequential order.
uint32_t block_ordinal = 0;
auto block = builder->first_block();
while (block) {
block->ordinal = block_ordinal++;
block = block->next;
}
return block_ordinal;
}
void DataFlowAnalysisPass::AnalyzeFlow(HIRBuilder* builder,
uint32_t block_count) {
uint32_t max_value_estimate =
builder->max_value_ordinal() + 1 + block_count * 4;
// Stash for value map. We may want to maintain this during building.
auto arena = builder->arena();
Value** value_map = (Value**)arena->Alloc(
sizeof(Value*) * max_value_estimate);
// Allocate incoming bitvectors for use by blocks. We don't need outgoing
// because they are only used during the block iteration.
// Mapped by block ordinal.
// TODO(benvanik): cache this list, grow as needed, etc.
auto incoming_bitvectors = (llvm::BitVector**)arena->Alloc(
sizeof(llvm::BitVector*) * block_count);
for (auto n = 0u; n < block_count; n++) {
incoming_bitvectors[n] = new llvm::BitVector(max_value_estimate);
}
// Walk blocks in reverse and calculate incoming/outgoing values.
auto block = builder->last_block();
while (block) {
// Allocate bitsets based on max value number.
block->incoming_values = incoming_bitvectors[block->ordinal];
auto& incoming_values = *block->incoming_values;
// Walk instructions and gather up incoming values.
auto instr = block->instr_head;
while (instr) {
uint32_t signature = instr->opcode->signature;
#define SET_INCOMING_VALUE(v) \
if (v->def && v->def->block != block) { \
incoming_values.set(v->ordinal); \
} \
XEASSERT(v->ordinal < max_value_estimate); \
value_map[v->ordinal] = v;
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
SET_INCOMING_VALUE(instr->src1.value);
}
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
SET_INCOMING_VALUE(instr->src2.value);
}
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
SET_INCOMING_VALUE(instr->src3.value);
}
#undef SET_INCOMING_VALUE
instr = instr->next;
}
// Add all successor incoming values to our outgoing, as we need to
// pass them through.
llvm::BitVector outgoing_values(max_value_estimate);
auto outgoing_edge = block->outgoing_edge_head;
while (outgoing_edge) {
if (outgoing_edge->dest->ordinal > block->ordinal) {
outgoing_values |= *outgoing_edge->dest->incoming_values;
}
outgoing_edge = outgoing_edge->outgoing_next;
}
incoming_values |= outgoing_values;
// Add stores for all outgoing values.
auto outgoing_ordinal = outgoing_values.find_first();
while (outgoing_ordinal != -1) {
Value* src_value = value_map[outgoing_ordinal];
XEASSERTNOTNULL(src_value);
if (!src_value->local_slot) {
src_value->local_slot = builder->AllocLocal(src_value->type);
}
builder->StoreLocal(src_value->local_slot, src_value);
// If we are in the block the value was defined in:
if (src_value->def->block == block) {
// Move the store to right after the def, or as soon after
// as we can (respecting PAIRED flags).
auto def_next = src_value->def->next;
while (def_next && def_next->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
def_next = def_next->next;
}
XEASSERTNOTNULL(def_next);
builder->last_instr()->MoveBefore(def_next);
// We don't need it in the incoming list.
incoming_values.reset(outgoing_ordinal);
} else {
// Eh, just throw at the end, before the first branch.
auto tail = block->instr_tail;
while (tail && tail->opcode->flags & OPCODE_FLAG_BRANCH) {
tail = tail->prev;
}
XEASSERTNOTZERO(tail);
builder->last_instr()->MoveBefore(tail->next);
}
outgoing_ordinal = outgoing_values.find_next(outgoing_ordinal);
}
// Add loads for all incoming values and rename them in the block.
auto incoming_ordinal = incoming_values.find_first();
while (incoming_ordinal != -1) {
Value* src_value = value_map[incoming_ordinal];
XEASSERTNOTNULL(src_value);
if (!src_value->local_slot) {
src_value->local_slot = builder->AllocLocal(src_value->type);
}
Value* local_value = builder->LoadLocal(src_value->local_slot);
builder->last_instr()->MoveBefore(block->instr_head);
// Swap uses of original value with the local value.
auto instr = block->instr_head;
while (instr) {
uint32_t signature = instr->opcode->signature;
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src1.value == src_value) {
instr->set_src1(local_value);
}
}
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src2.value == src_value) {
instr->set_src2(local_value);
}
}
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src3.value == src_value) {
instr->set_src3(local_value);
}
}
instr = instr->next;
}
incoming_ordinal = incoming_values.find_next(incoming_ordinal);
}
block = block->prev;
}
// Cleanup bitvectors.
for (auto n = 0u; n < block_count; n++) {
delete incoming_bitvectors[n];
}
}

View File

@ -0,0 +1,39 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_COMPILER_PASSES_DATA_FLOW_ANALYSIS_PASS_H_
#define ALLOY_COMPILER_PASSES_DATA_FLOW_ANALYSIS_PASS_H_
#include <alloy/compiler/compiler_pass.h>
namespace alloy {
namespace compiler {
namespace passes {
class DataFlowAnalysisPass : public CompilerPass {
public:
DataFlowAnalysisPass();
virtual ~DataFlowAnalysisPass();
virtual int Run(hir::HIRBuilder* builder);
private:
uint32_t LinearizeBlocks(hir::HIRBuilder* builder);
void AnalyzeFlow(hir::HIRBuilder* builder, uint32_t block_count);
};
} // namespace passes
} // namespace compiler
} // namespace alloy
#endif // ALLOY_COMPILER_PASSES_DATA_FLOW_ANALYSIS_PASS_H_

View File

@ -23,6 +23,8 @@ DeadCodeEliminationPass::~DeadCodeEliminationPass() {
} }
int DeadCodeEliminationPass::Run(HIRBuilder* builder) { int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// ContextPromotion/DSE will likely leave around a lot of dead statements. // ContextPromotion/DSE will likely leave around a lot of dead statements.
// Code generated for comparison/testing produces many unused statements and // Code generated for comparison/testing produces many unused statements and
// with proper use analysis it should be possible to remove most of them: // with proper use analysis it should be possible to remove most of them:
@ -59,20 +61,21 @@ int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
// all removed ops with NOP and then do a single pass that removes them // all removed ops with NOP and then do a single pass that removes them
// all. // all.
bool any_removed = false; bool any_instr_removed = false;
bool any_locals_removed = false;
Block* block = builder->first_block(); Block* block = builder->first_block();
while (block) { while (block) {
// Walk instructions in reverse.
Instr* i = block->instr_tail; Instr* i = block->instr_tail;
while (i) { while (i) {
Instr* prev = i->prev; auto prev = i->prev;
const OpcodeInfo* opcode = i->opcode; auto opcode = i->opcode;
uint32_t signature = opcode->signature;
if (!(opcode->flags & OPCODE_FLAG_VOLATILE) && if (!(opcode->flags & OPCODE_FLAG_VOLATILE) &&
i->dest && !i->dest->use_head) { i->dest && !i->dest->use_head) {
// Has no uses and is not volatile. This instruction can die! // Has no uses and is not volatile. This instruction can die!
MakeNopRecursive(i); MakeNopRecursive(i);
any_removed = true; any_instr_removed = true;
} else if (opcode == &OPCODE_ASSIGN_info) { } else if (opcode == &OPCODE_ASSIGN_info) {
// Assignment. These are useless, so just try to remove by completely // Assignment. These are useless, so just try to remove by completely
// replacing the value. // replacing the value.
@ -82,11 +85,31 @@ int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
i = prev; i = prev;
} }
// Walk instructions forward.
i = block->instr_head;
while (i) {
auto next = i->next;
auto opcode = i->opcode;
if (opcode == &OPCODE_STORE_LOCAL_info) {
// Check to see if the store has any interceeding uses after the load.
// If not, it can be removed (as the local is just passing through the
// function).
// We do this after the previous pass so that removed code doesn't keep
// the local alive.
if (!CheckLocalUse(i)) {
any_locals_removed = true;
}
}
i = next;
}
block = block->next; block = block->next;
} }
// Remove all nops. // Remove all nops.
if (any_removed) { if (any_instr_removed) {
Block* block = builder->first_block(); Block* block = builder->first_block();
while (block) { while (block) {
Instr* i = block->instr_head; Instr* i = block->instr_head;
@ -102,6 +125,21 @@ int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
} }
} }
// Remove any locals that no longer have uses.
if (any_locals_removed) {
// TODO(benvanik): local removal/dealloc.
auto locals = builder->locals();
for (auto it = locals.begin(); it != locals.end();) {
auto next = ++it;
auto value = *it;
if (!value->use_head) {
// Unused, can be removed.
locals.erase(it);
}
it = next;
}
}
return 0; return 0;
} }
@ -150,3 +188,24 @@ void DeadCodeEliminationPass::ReplaceAssignment(Instr* i) {
i->Remove(); i->Remove();
} }
bool DeadCodeEliminationPass::CheckLocalUse(Instr* i) {
auto slot = i->src1.value;
auto src = i->src2.value;
auto use = src->use_head;
if (use) {
auto use_instr = use->instr;
if (use_instr->opcode != &OPCODE_LOAD_LOCAL_info) {
// A valid use (probably). Keep it.
return true;
}
// Load/store are paired. They can both be removed.
use_instr->Remove();
}
i->Remove();
return false;
}

View File

@ -28,6 +28,7 @@ public:
private: private:
void MakeNopRecursive(hir::Instr* i); void MakeNopRecursive(hir::Instr* i);
void ReplaceAssignment(hir::Instr* i); void ReplaceAssignment(hir::Instr* i);
bool CheckLocalUse(hir::Instr* i);
}; };

View File

@ -30,6 +30,8 @@ FinalizationPass::~FinalizationPass() {
} }
int FinalizationPass::Run(HIRBuilder* builder) { int FinalizationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Process the HIR and prepare it for lowering. // Process the HIR and prepare it for lowering.
// After this is done the HIR should be ready for emitting. // After this is done the HIR should be ready for emitting.
@ -44,9 +46,9 @@ int FinalizationPass::Run(HIRBuilder* builder) {
auto label = block->label_head; auto label = block->label_head;
while (label) { while (label) {
if (!label->name) { if (!label->name) {
char* name = (char*)arena->Alloc(6 + 4 + 1); const size_t label_len = 6 + 4 + 1;
xestrcpya(name, 6 + 1, "_label"); char* name = (char*)arena->Alloc(label_len);
char* part = _itoa(label->id, name + 6, 10); xesnprintfa(name, label_len, "_label%d", label->id);
label->name = name; label->name = name;
} }
label = label->next; label = label->next;

View File

@ -0,0 +1,539 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/compiler/passes/register_allocation_pass.h>
#include <algorithm>
using namespace alloy;
using namespace alloy::backend;
using namespace alloy::compiler;
using namespace alloy::compiler::passes;
using namespace alloy::hir;
#define ASSERT_NO_CYCLES 0
RegisterAllocationPass::RegisterAllocationPass(
const MachineInfo* machine_info) :
machine_info_(machine_info),
CompilerPass() {
// Initialize register sets.
// TODO(benvanik): rewrite in a way that makes sense - this is terrible.
auto mi_sets = machine_info->register_sets;
xe_zero_struct(&usage_sets_, sizeof(usage_sets_));
uint32_t n = 0;
while (mi_sets[n].count) {
auto& mi_set = mi_sets[n];
auto usage_set = new RegisterSetUsage();
usage_sets_.all_sets[n] = usage_set;
usage_set->count = mi_set.count;
usage_set->set = &mi_set;
if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) {
usage_sets_.int_set = usage_set;
}
if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) {
usage_sets_.float_set = usage_set;
}
if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) {
usage_sets_.vec_set = usage_set;
}
n++;
}
}
RegisterAllocationPass::~RegisterAllocationPass() {
for (size_t n = 0; n < XECOUNT(usage_sets_.all_sets); n++) {
if (!usage_sets_.all_sets[n]) {
break;
}
delete usage_sets_.all_sets[n];
}
}
int RegisterAllocationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Simple per-block allocator that operates on SSA form.
// Registers do not move across blocks, though this could be
// optimized with some intra-block analysis (dominators/etc).
// Really, it'd just be nice to have someone who knew what they
// were doing lower SSA and do this right.
uint32_t block_ordinal = 0;
uint32_t instr_ordinal = 0;
auto block = builder->first_block();
while (block) {
// Sequential block ordinals.
block->ordinal = block_ordinal++;
// Reset all state.
PrepareBlockState();
// Renumber all instructions in the block. This is required so that
// we can sort the usage pointers below.
auto instr = block->instr_head;
while (instr) {
// Sequential global instruction ordinals.
instr->ordinal = instr_ordinal++;
instr = instr->next;
}
instr = block->instr_head;
while (instr) {
const OpcodeInfo* info = instr->opcode;
uint32_t signature = info->signature;
// Update the register use heaps.
AdvanceUses(instr);
// Check sources for retirement. If any are unused after this instruction
// we can eagerly evict them to speed up register allocation.
// Since X64 (and other platforms) can often take advantage of dest==src1
// register mappings we track retired src1 so that we can attempt to
// reuse it.
// NOTE: these checks require that the usage list be sorted!
bool has_preferred_reg = false;
RegAssignment preferred_reg = { 0 };
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V &&
!instr->src1.value->IsConstant()) {
if (!instr->src1_use->next) {
// Pull off preferred register. We will try to reuse this for the
// dest.
has_preferred_reg = true;
preferred_reg = instr->src1.value->reg;
XEASSERTNOTNULL(preferred_reg.set);
}
}
if (GET_OPCODE_SIG_TYPE_DEST(signature) == OPCODE_SIG_TYPE_V) {
// Must not have been set already.
XEASSERTNULL(instr->dest->reg.set);
// Sort the usage list. We depend on this in future uses of this variable.
SortUsageList(instr->dest);
// If we have a preferred register, use that.
// This way we can help along the stupid X86 two opcode instructions.
bool allocated;
if (has_preferred_reg) {
// Allocate with the given preferred register. If the register is in
// the wrong set it will not be reused.
allocated = TryAllocateRegister(instr->dest, preferred_reg);
} else {
// Allocate a register. This will either reserve a free one or
// spill and reuse an active one.
allocated = TryAllocateRegister(instr->dest);
}
if (!allocated) {
// Failed to allocate register -- need to spill and try again.
// We spill only those registers we aren't using.
if (!SpillOneRegister(builder, instr->dest->type)) {
// Unable to spill anything - this shouldn't happen.
XELOGE("Unable to spill any registers");
XEASSERTALWAYS();
return 1;
}
// Demand allocation.
if (!TryAllocateRegister(instr->dest)) {
// Boned.
XELOGE("Register allocation failed");
XEASSERTALWAYS();
return 1;
}
}
}
instr = instr->next;
}
block = block->next;
}
return 0;
}
void RegisterAllocationPass::DumpUsage(const char* name) {
#if 0
fprintf(stdout, "\n%s:\n", name);
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
auto usage_set = usage_sets_.all_sets[i];
if (usage_set) {
fprintf(stdout, "set %s:\n", usage_set->set->name);
fprintf(stdout, " avail: %s\n", usage_set->availability.to_string().c_str());
fprintf(stdout, " upcoming uses:\n");
for (auto it = usage_set->upcoming_uses.begin();
it != usage_set->upcoming_uses.end(); ++it) {
fprintf(stdout, " v%d, used at %d\n",
it->value->ordinal,
it->use->instr->ordinal);
}
}
}
fflush(stdout);
#endif
}
void RegisterAllocationPass::PrepareBlockState() {
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
auto usage_set = usage_sets_.all_sets[i];
if (usage_set) {
usage_set->availability.set();
usage_set->upcoming_uses.clear();
}
}
DumpUsage("PrepareBlockState");
}
void RegisterAllocationPass::AdvanceUses(Instr* instr) {
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
auto usage_set = usage_sets_.all_sets[i];
if (!usage_set) {
break;
}
auto& upcoming_uses = usage_set->upcoming_uses;
for (auto it = upcoming_uses.begin(); it != upcoming_uses.end();) {
if (!it->use) {
// No uses at all - we can remove right away.
// This comes up from instructions where the dest is never used,
// like the ATOMIC ops.
MarkRegAvailable(it->value->reg);
it = upcoming_uses.erase(it);
continue;
}
if (it->use->instr != instr) {
// Not yet at this instruction.
++it;
continue;
}
// The use is from this instruction.
if (!it->use->next) {
// Last use of the value. We can retire it now.
MarkRegAvailable(it->value->reg);
it = upcoming_uses.erase(it);
} else {
// Used again. Push back the next use.
// Note that we may be used multiple times this instruction, so
// eat those.
auto next_use = it->use->next;
while (next_use->next && next_use->instr == instr) {
next_use = next_use->next;
}
// Remove the iterator.
auto value = it->value;
it = upcoming_uses.erase(it);
upcoming_uses.emplace_back(value, next_use);
}
}
}
DumpUsage("AdvanceUses");
}
bool RegisterAllocationPass::IsRegInUse(const RegAssignment& reg) {
RegisterSetUsage* usage_set;
if (reg.set == usage_sets_.int_set->set) {
usage_set = usage_sets_.int_set;
} else if (reg.set == usage_sets_.float_set->set) {
usage_set = usage_sets_.float_set;
} else {
usage_set = usage_sets_.vec_set;
}
return !usage_set->availability.test(reg.index);
}
RegisterAllocationPass::RegisterSetUsage*
RegisterAllocationPass::MarkRegUsed(const RegAssignment& reg,
Value* value, Value::Use* use) {
auto usage_set = RegisterSetForValue(value);
usage_set->availability.set(reg.index, false);
usage_set->upcoming_uses.emplace_back(value, use);
DumpUsage("MarkRegUsed");
return usage_set;
}
RegisterAllocationPass::RegisterSetUsage*
RegisterAllocationPass::MarkRegAvailable(const hir::RegAssignment& reg) {
RegisterSetUsage* usage_set;
if (reg.set == usage_sets_.int_set->set) {
usage_set = usage_sets_.int_set;
} else if (reg.set == usage_sets_.float_set->set) {
usage_set = usage_sets_.float_set;
} else {
usage_set = usage_sets_.vec_set;
}
usage_set->availability.set(reg.index, true);
return usage_set;
}
bool RegisterAllocationPass::TryAllocateRegister(
Value* value, const RegAssignment& preferred_reg) {
// If the preferred register matches type and is available, use it.
auto usage_set = RegisterSetForValue(value);
if (usage_set->set == preferred_reg.set) {
// Check if available.
if (!IsRegInUse(preferred_reg)) {
// Mark as in-use and return. Best case.
MarkRegUsed(preferred_reg, value, value->use_head);
value->reg = preferred_reg;
return true;
}
}
// Otherwise, fallback to allocating like normal.
return TryAllocateRegister(value);
}
bool RegisterAllocationPass::TryAllocateRegister(Value* value) {
// Get the set this register is in.
RegisterSetUsage* usage_set = RegisterSetForValue(value);
// Find the first free register, if any.
// We have to ensure it's a valid one (in our count).
unsigned long first_unused = 0;
bool all_used = _BitScanForward(&first_unused, usage_set->availability.to_ulong()) == 0;
if (!all_used && first_unused < usage_set->count) {
// Available! Use it!.
value->reg.set = usage_set->set;
value->reg.index = first_unused;
MarkRegUsed(value->reg, value, value->use_head);
return true;
}
// None available! Spill required.
return false;
}
bool RegisterAllocationPass::SpillOneRegister(
HIRBuilder* builder, TypeName required_type) {
// Get the set that we will be picking from.
RegisterSetUsage* usage_set;
if (required_type <= INT64_TYPE) {
usage_set = usage_sets_.int_set;
} else if (required_type <= FLOAT64_TYPE) {
usage_set = usage_sets_.float_set;
} else {
usage_set = usage_sets_.vec_set;
}
DumpUsage("SpillOneRegister (pre)");
// Pick the one with the furthest next use.
XEASSERT(!usage_set->upcoming_uses.empty());
auto furthest_usage = std::max_element(
usage_set->upcoming_uses.begin(), usage_set->upcoming_uses.end(),
RegisterUsage::Comparer());
Value* spill_value = furthest_usage->value;
Value::Use* prev_use = furthest_usage->use->prev;
Value::Use* next_use = furthest_usage->use;
XEASSERTNOTNULL(next_use);
usage_set->upcoming_uses.erase(furthest_usage);
DumpUsage("SpillOneRegister (post)");
const auto reg = spill_value->reg;
// We know the spill_value use list is sorted, so we can cut it right now.
// This makes it easier down below.
auto new_head_use = next_use;
// Allocate local.
if (spill_value->local_slot) {
// Value is already assigned a slot. Since we allocate in order and this is
// all SSA we know the stored value will be exactly what we want. Yay,
// we can prevent the redundant store!
// In fact, we may even want to pin this spilled value so that we always
// use the spilled value and prevent the need for more locals.
} else {
// Allocate a local slot.
spill_value->local_slot = builder->AllocLocal(spill_value->type);
// Add store.
builder->StoreLocal(spill_value->local_slot, spill_value);
auto spill_store = builder->last_instr();
auto spill_store_use = spill_store->src2_use;
XEASSERTNULL(spill_store_use->prev);
if (prev_use && prev_use->instr->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
// Instruction is paired. This is bad. We will insert the spill after the
// paired instruction.
XEASSERTNOTNULL(prev_use->instr->next);
spill_store->MoveBefore(prev_use->instr->next);
// Update last use.
spill_value->last_use = spill_store;
} else if (prev_use) {
// We insert the store immediately before the previous use.
// If we were smarter we could then re-run allocation and reuse the register
// once dropped.
spill_store->MoveBefore(prev_use->instr);
// Update last use.
spill_value->last_use = prev_use->instr;
} else {
// This is the first use, so the only thing we have is the define.
// Move the store to right after that.
spill_store->MoveBefore(spill_value->def->next);
// Update last use.
spill_value->last_use = spill_store;
}
}
#if ASSERT_NO_CYCLES
builder->AssertNoCycles();
spill_value->def->block->AssertNoCycles();
#endif // ASSERT_NO_CYCLES
// Add load.
// Inserted immediately before the next use. Since by definition the next
// use is after the instruction requesting the spill we know we haven't
// done allocation for that code yet and can let that be handled
// automatically when we get to it.
auto new_value = builder->LoadLocal(spill_value->local_slot);
auto spill_load = builder->last_instr();
spill_load->MoveBefore(next_use->instr);
// Note: implicit first use added.
#if ASSERT_NO_CYCLES
builder->AssertNoCycles();
spill_value->def->block->AssertNoCycles();
#endif // ASSERT_NO_CYCLES
// Set the local slot of the new value to our existing one. This way we will
// reuse that same memory if needed.
new_value->local_slot = spill_value->local_slot;
// Rename all future uses of the SSA value to the new value as loaded
// from the local.
// We can quickly do this by walking the use list. Because the list is
// already sorted we know we are going to end up with a sorted list.
auto walk_use = new_head_use;
auto new_use_tail = walk_use;
while (walk_use) {
auto next_walk_use = walk_use->next;
auto instr = walk_use->instr;
uint32_t signature = instr->opcode->signature;
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src1.value == spill_value) {
instr->set_src1(new_value);
}
}
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src2.value == spill_value) {
instr->set_src2(new_value);
}
}
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src3.value == spill_value) {
instr->set_src3(new_value);
}
}
walk_use = next_walk_use;
if (walk_use) {
new_use_tail = walk_use;
}
}
new_value->last_use = new_use_tail->instr;
// Update tracking.
MarkRegAvailable(reg);
return true;
}
RegisterAllocationPass::RegisterSetUsage*
RegisterAllocationPass::RegisterSetForValue(
const Value* value) {
if (value->type <= INT64_TYPE) {
return usage_sets_.int_set;
} else if (value->type <= FLOAT64_TYPE) {
return usage_sets_.float_set;
} else {
return usage_sets_.vec_set;
}
}
namespace {
int CompareValueUse(const Value::Use* a, const Value::Use* b) {
return a->instr->ordinal - b->instr->ordinal;
}
} // namespace
void RegisterAllocationPass::SortUsageList(Value* value) {
// Modified in-place linked list sort from:
// http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.c
if (!value->use_head) {
return;
}
Value::Use* head = value->use_head;
Value::Use* tail = nullptr;
int insize = 1;
while (true) {
auto p = head;
head = nullptr;
tail = nullptr;
// count number of merges we do in this pass
int nmerges = 0;
while (p) {
// there exists a merge to be done
nmerges++;
// step 'insize' places along from p
auto q = p;
int psize = 0;
for (int i = 0; i < insize; i++) {
psize++;
q = q->next;
if (!q) break;
}
// if q hasn't fallen off end, we have two lists to merge
int qsize = insize;
// now we have two lists; merge them
while (psize > 0 || (qsize > 0 && q)) {
// decide whether next element of merge comes from p or q
Value::Use* e = nullptr;
if (psize == 0) {
// p is empty; e must come from q
e = q; q = q->next; qsize--;
} else if (qsize == 0 || !q) {
// q is empty; e must come from p
e = p; p = p->next; psize--;
} else if (CompareValueUse(p, q) <= 0) {
// First element of p is lower (or same); e must come from p
e = p; p = p->next; psize--;
} else {
// First element of q is lower; e must come from q
e = q; q = q->next; qsize--;
}
// add the next element to the merged list
if (tail) {
tail->next = e;
} else {
head = e;
}
// Maintain reverse pointers in a doubly linked list.
e->prev = tail;
tail = e;
}
// now p has stepped 'insize' places along, and q has too
p = q;
}
if (tail) {
tail->next = nullptr;
}
// If we have done only one merge, we're finished
if (nmerges <= 1) {
// allow for nmerges==0, the empty list case
break;
}
// Otherwise repeat, merging lists twice the size
insize *= 2;
}
value->use_head = head;
value->last_use = tail->instr;
}

View File

@ -0,0 +1,89 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
#define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
#include <algorithm>
#include <bitset>
#include <vector>
#include <alloy/backend/machine_info.h>
#include <alloy/compiler/compiler_pass.h>
namespace alloy {
namespace compiler {
namespace passes {
class RegisterAllocationPass : public CompilerPass {
public:
RegisterAllocationPass(const backend::MachineInfo* machine_info);
virtual ~RegisterAllocationPass();
virtual int Run(hir::HIRBuilder* builder);
private:
// TODO(benvanik): rewrite all this set shit -- too much indirection, the
// complexity is not needed.
struct RegisterUsage {
hir::Value* value;
hir::Value::Use* use;
RegisterUsage() : value(nullptr), use(nullptr) {}
RegisterUsage(hir::Value* value_, hir::Value::Use* use_)
: value(value_), use(use_) {}
struct Comparer : std::binary_function<RegisterUsage, RegisterUsage, bool> {
bool operator()(const RegisterUsage& a, const RegisterUsage& b) const {
return a.use->instr->ordinal < b.use->instr->ordinal;
}
};
};
struct RegisterSetUsage {
const backend::MachineInfo::RegisterSet* set = nullptr;
uint32_t count = 0;
std::bitset<32> availability = 0;
// TODO(benvanik): another data type.
std::vector<RegisterUsage> upcoming_uses;
};
void DumpUsage(const char* name);
void PrepareBlockState();
void AdvanceUses(hir::Instr* instr);
bool IsRegInUse(const hir::RegAssignment& reg);
RegisterSetUsage* MarkRegUsed(const hir::RegAssignment& reg,
hir::Value* value, hir::Value::Use* use);
RegisterSetUsage* MarkRegAvailable(const hir::RegAssignment& reg);
bool TryAllocateRegister(hir::Value* value,
const hir::RegAssignment& preferred_reg);
bool TryAllocateRegister(hir::Value* value);
bool SpillOneRegister(hir::HIRBuilder* builder, hir::TypeName required_type);
RegisterSetUsage* RegisterSetForValue(const hir::Value* value);
void SortUsageList(hir::Value* value);
private:
const backend::MachineInfo* machine_info_;
struct {
RegisterSetUsage* int_set = nullptr;
RegisterSetUsage* float_set = nullptr;
RegisterSetUsage* vec_set = nullptr;
RegisterSetUsage* all_sets[3];
} usage_sets_;
};
} // namespace passes
} // namespace compiler
} // namespace alloy
#endif // ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_

View File

@ -23,6 +23,8 @@ SimplificationPass::~SimplificationPass() {
} }
int SimplificationPass::Run(HIRBuilder* builder) { int SimplificationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
EliminateConversions(builder); EliminateConversions(builder);
SimplifyAssignments(builder); SimplifyAssignments(builder);
return 0; return 0;

View File

@ -5,14 +5,22 @@
'constant_propagation_pass.h', 'constant_propagation_pass.h',
'context_promotion_pass.cc', 'context_promotion_pass.cc',
'context_promotion_pass.h', 'context_promotion_pass.h',
'control_flow_analysis_pass.cc',
'control_flow_analysis_pass.h',
'data_flow_analysis_pass.cc',
'data_flow_analysis_pass.h',
'dead_code_elimination_pass.cc', 'dead_code_elimination_pass.cc',
'dead_code_elimination_pass.h', 'dead_code_elimination_pass.h',
'finalization_pass.cc', 'finalization_pass.cc',
'finalization_pass.h', 'finalization_pass.h',
#'dead_store_elimination_pass.cc', #'dead_store_elimination_pass.cc',
#'dead_store_elimination_pass.h', #'dead_store_elimination_pass.h',
'register_allocation_pass.cc',
'register_allocation_pass.h',
'simplification_pass.cc', 'simplification_pass.cc',
'simplification_pass.h', 'simplification_pass.h',
'validation_pass.cc',
'validation_pass.h',
'value_reduction_pass.cc', 'value_reduction_pass.cc',
'value_reduction_pass.h', 'value_reduction_pass.h',
], ],

View File

@ -0,0 +1,101 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/compiler/passes/validation_pass.h>
#include <alloy/backend/backend.h>
#include <alloy/compiler/compiler.h>
#include <alloy/runtime/runtime.h>
using namespace alloy;
using namespace alloy::backend;
using namespace alloy::compiler;
using namespace alloy::compiler::passes;
using namespace alloy::frontend;
using namespace alloy::hir;
using namespace alloy::runtime;
ValidationPass::ValidationPass() :
CompilerPass() {
}
ValidationPass::~ValidationPass() {
}
int ValidationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
StringBuffer str;
builder->Dump(&str);
printf(str.GetString());
fflush(stdout);
str.Reset();
auto block = builder->first_block();
while (block) {
auto label = block->label_head;
while (label) {
XEASSERT(label->block == block);
if (label->block != block) {
return 1;
}
label = label->next;
}
auto instr = block->instr_head;
while (instr) {
if (ValidateInstruction(block, instr)) {
return 1;
}
instr = instr->next;
}
block = block->next;
}
return 0;
}
int ValidationPass::ValidateInstruction(Block* block, Instr* instr) {
XEASSERT(instr->block == block);
if (instr->block != block) {
return 1;
}
uint32_t signature = instr->opcode->signature;
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
if (ValidateValue(block, instr, instr->src1.value)) {
return 1;
}
}
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
if (ValidateValue(block, instr, instr->src2.value)) {
return 1;
}
}
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
if (ValidateValue(block, instr, instr->src3.value)) {
return 1;
}
}
return 0;
}
int ValidationPass::ValidateValue(Block* block, Instr* instr, Value* value) {
//if (value->def) {
// auto def = value->def;
// XEASSERT(def->block == block);
// if (def->block != block) {
// return 1;
// }
//}
return 0;
}

View File

@ -0,0 +1,39 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_COMPILER_PASSES_VALIDATION_PASS_H_
#define ALLOY_COMPILER_PASSES_VALIDATION_PASS_H_
#include <alloy/compiler/compiler_pass.h>
namespace alloy {
namespace compiler {
namespace passes {
class ValidationPass : public CompilerPass {
public:
ValidationPass();
virtual ~ValidationPass();
virtual int Run(hir::HIRBuilder* builder);
private:
int ValidateInstruction(hir::Block* block, hir::Instr* instr);
int ValidateValue(hir::Block* block, hir::Instr* instr, hir::Value* value);
};
} // namespace passes
} // namespace compiler
} // namespace alloy
#endif // ALLOY_COMPILER_PASSES_VALIDATION_PASS_H_

View File

@ -13,7 +13,11 @@
#include <alloy/compiler/compiler.h> #include <alloy/compiler/compiler.h>
#include <alloy/runtime/runtime.h> #include <alloy/runtime/runtime.h>
#include <bitset> #pragma warning(push)
#pragma warning(disable : 4244)
#pragma warning(disable : 4267)
#include <llvm/ADT/BitVector.h>
#pragma warning(pop)
using namespace alloy; using namespace alloy;
using namespace alloy::backend; using namespace alloy::backend;
@ -49,10 +53,11 @@ void ValueReductionPass::ComputeLastUse(Value* value) {
} }
int ValueReductionPass::Run(HIRBuilder* builder) { int ValueReductionPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Walk each block and reuse variable ordinals as much as possible. // Walk each block and reuse variable ordinals as much as possible.
// Let's hope this is enough. llvm::BitVector ordinals(builder->max_value_ordinal());
std::bitset<1024> ordinals;
auto block = builder->first_block(); auto block = builder->first_block();
while (block) { while (block) {
@ -74,34 +79,40 @@ int ValueReductionPass::Run(HIRBuilder* builder) {
OpcodeSignatureType src1_type = GET_OPCODE_SIG_TYPE_SRC1(info->signature); OpcodeSignatureType src1_type = GET_OPCODE_SIG_TYPE_SRC1(info->signature);
OpcodeSignatureType src2_type = GET_OPCODE_SIG_TYPE_SRC2(info->signature); OpcodeSignatureType src2_type = GET_OPCODE_SIG_TYPE_SRC2(info->signature);
OpcodeSignatureType src3_type = GET_OPCODE_SIG_TYPE_SRC3(info->signature); OpcodeSignatureType src3_type = GET_OPCODE_SIG_TYPE_SRC3(info->signature);
if (src1_type == OPCODE_SIG_TYPE_V && !instr->src1.value->IsConstant()) { if (src1_type == OPCODE_SIG_TYPE_V) {
auto v = instr->src1.value; auto v = instr->src1.value;
if (!v->last_use) { if (!v->last_use) {
ComputeLastUse(v); ComputeLastUse(v);
} }
if (v->last_use == instr) { if (v->last_use == instr) {
// Available. // Available.
ordinals.set(v->ordinal, false); if (!instr->src1.value->IsConstant()) {
ordinals.reset(v->ordinal);
} }
} }
if (src2_type == OPCODE_SIG_TYPE_V && !instr->src2.value->IsConstant()) { }
if (src2_type == OPCODE_SIG_TYPE_V) {
auto v = instr->src2.value; auto v = instr->src2.value;
if (!v->last_use) { if (!v->last_use) {
ComputeLastUse(v); ComputeLastUse(v);
} }
if (v->last_use == instr) { if (v->last_use == instr) {
// Available. // Available.
ordinals.set(v->ordinal, false); if (!instr->src2.value->IsConstant()) {
ordinals.reset(v->ordinal);
} }
} }
if (src3_type == OPCODE_SIG_TYPE_V && !instr->src3.value->IsConstant()) { }
if (src3_type == OPCODE_SIG_TYPE_V) {
auto v = instr->src3.value; auto v = instr->src3.value;
if (!v->last_use) { if (!v->last_use) {
ComputeLastUse(v); ComputeLastUse(v);
} }
if (v->last_use == instr) { if (v->last_use == instr) {
// Available. // Available.
ordinals.set(v->ordinal, false); if (!instr->src3.value->IsConstant()) {
ordinals.reset(v->ordinal);
}
} }
} }
if (dest_type == OPCODE_SIG_TYPE_V) { if (dest_type == OPCODE_SIG_TYPE_V) {
@ -109,7 +120,7 @@ int ValueReductionPass::Run(HIRBuilder* builder) {
// source value ordinal. // source value ordinal.
auto v = instr->dest; auto v = instr->dest;
// Find a lower ordinal. // Find a lower ordinal.
for (auto n = 0; n < ordinals.size(); n++) { for (auto n = 0u; n < ordinals.size(); n++) {
if (!ordinals.test(n)) { if (!ordinals.test(n)) {
ordinals.set(n); ordinals.set(n);
v->ordinal = n; v->ordinal = n;

View File

@ -27,10 +27,10 @@ public:
ALLOY_COMPILER_DEINIT = ALLOY_COMPILER | (2), ALLOY_COMPILER_DEINIT = ALLOY_COMPILER | (2),
}; };
typedef struct { typedef struct Init_s {
static const uint32_t event_type = ALLOY_COMPILER_INIT; static const uint32_t event_type = ALLOY_COMPILER_INIT;
} Init; } Init;
typedef struct { typedef struct Deinit_s {
static const uint32_t event_type = ALLOY_COMPILER_DEINIT; static const uint32_t event_type = ALLOY_COMPILER_DEINIT;
} Deinit; } Deinit;
}; };

View File

@ -44,7 +44,33 @@ typedef struct XECACHEALIGN vec128_s {
uint64_t high; uint64_t high;
}; };
}; };
bool operator== (const vec128_s& b) const {
return low == b.low && high == b.high;
}
} vec128_t; } vec128_t;
XEFORCEINLINE vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) {
vec128_t v;
v.i4[0] = x; v.i4[1] = y; v.i4[2] = z; v.i4[3] = w;
return v;
}
XEFORCEINLINE vec128_t vec128f(float x, float y, float z, float w) {
vec128_t v;
v.f4[0] = x; v.f4[1] = y; v.f4[2] = z; v.f4[3] = w;
return v;
}
XEFORCEINLINE vec128_t vec128b(
uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3,
uint8_t y0, uint8_t y1, uint8_t y2, uint8_t y3,
uint8_t z0, uint8_t z1, uint8_t z2, uint8_t z3,
uint8_t w0, uint8_t w1, uint8_t w2, uint8_t w3) {
vec128_t v;
v.b16[0] = x3; v.b16[1] = x2; v.b16[2] = x1; v.b16[3] = x0;
v.b16[4] = y3; v.b16[5] = y2; v.b16[6] = y1; v.b16[7] = y0;
v.b16[8] = z3; v.b16[9] = z2; v.b16[10] = z1; v.b16[11] = z0;
v.b16[12] = w3; v.b16[13] = w2; v.b16[14] = w1; v.b16[15] = w0;
return v;
}
} // namespace alloy } // namespace alloy

View File

@ -11,6 +11,7 @@
#define ALLOY_DELEGATE_H_ #define ALLOY_DELEGATE_H_
#include <functional> #include <functional>
#include <vector>
#include <alloy/core.h> #include <alloy/core.h>
#include <alloy/mutex.h> #include <alloy/mutex.h>

View File

@ -67,6 +67,8 @@ typedef struct XECACHEALIGN64 PPCContext_s {
// Must be stored at 0x0 for now. // Must be stored at 0x0 for now.
// TODO(benvanik): find a nice way to describe this to the JIT. // TODO(benvanik): find a nice way to describe this to the JIT.
runtime::ThreadState* thread_state; runtime::ThreadState* thread_state;
// TODO(benvanik): this is getting nasty. Must be here.
uint8_t* membase;
// Most frequently used registers first. // Most frequently used registers first.
uint64_t r[32]; // General purpose registers uint64_t r[32]; // General purpose registers
@ -196,7 +198,6 @@ typedef struct XECACHEALIGN64 PPCContext_s {
// Runtime-specific data pointer. Used on callbacks to get access to the // Runtime-specific data pointer. Used on callbacks to get access to the
// current runtime and its data. // current runtime and its data.
uint8_t* membase;
runtime::Runtime* runtime; runtime::Runtime* runtime;
volatile int suspend_flag; volatile int suspend_flag;

View File

@ -115,7 +115,7 @@ void Disasm_X_RA_RB(InstrData& i, StringBuffer* str) {
i.X.RA, i.X.RB); i.X.RA, i.X.RB);
} }
void Disasm_XO_RT_RA_RB(InstrData& i, StringBuffer* str) { void Disasm_XO_RT_RA_RB(InstrData& i, StringBuffer* str) {
str->Append("%*s%s%s r%d, r%d", i.XO.Rc ? -7 : -8, i.type->name, str->Append("%*s%s%s r%d, r%d, r%d", i.XO.Rc ? -7 : -8, i.type->name,
i.XO.OE ? "o" : "", i.XO.Rc ? "." : "", i.XO.OE ? "o" : "", i.XO.Rc ? "." : "",
i.XO.RT, i.XO.RA, i.XO.RB); i.XO.RT, i.XO.RA, i.XO.RB);
} }
@ -266,7 +266,7 @@ void Disasm_dcbz(InstrData& i, StringBuffer* str) {
} }
void Disasm_fcmp(InstrData& i, StringBuffer* str) { void Disasm_fcmp(InstrData& i, StringBuffer* str) {
str->Append("%-8s cr%d, r%d, r%d", i.type->name, str->Append("%-8s cr%d, f%d, f%d", i.type->name,
i.X.RT >> 2, i.X.RA, i.X.RB); i.X.RT >> 2, i.X.RA, i.X.RB);
} }

View File

@ -105,6 +105,10 @@ Value* CalculateEA_0(PPCHIRBuilder& f, uint32_t ra, uint32_t rb);
// } // }
unsigned int xerotl(unsigned int value, unsigned int shift) {
XEASSERT(shift < 32);
return shift == 0 ? value : ((value << shift) | (value >> (32 - shift)));
}
XEEMITTER(dst, 0x7C0002AC, XDSS)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(dst, 0x7C0002AC, XDSS)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); XEINSTRNOTIMPLEMENTED();
@ -1797,7 +1801,7 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f, InstrData
// http://hlssmod.net/he_code/public/pixelwriter.h // http://hlssmod.net/he_code/public/pixelwriter.h
// control = prev:0123 | new:4567 // control = prev:0123 | new:4567
uint32_t control = 0x00010203; // original uint32_t control = 0x00010203; // original
uint32_t src = _rotl(0x04050607, shift * 8); uint32_t src = xerotl(0x04050607, shift * 8);
uint32_t mask = 0; uint32_t mask = 0;
switch (pack) { switch (pack) {
case 1: // VPACK_32 case 1: // VPACK_32

View File

@ -643,20 +643,20 @@ XEEMITTER(cmpli, 0x28000000, D )(PPCHIRBuilder& f, InstrData& i) {
XEEMITTER(andx, 0x7C000038, X )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(andx, 0x7C000038, X )(PPCHIRBuilder& f, InstrData& i) {
// RA <- (RS) & (RB) // RA <- (RS) & (RB)
Value* ra = f.And(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); Value* ra = f.And(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
XEEMITTER(andcx, 0x7C000078, X )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(andcx, 0x7C000078, X )(PPCHIRBuilder& f, InstrData& i) {
// RA <- (RS) & ¬(RB) // RA <- (RS) & ¬(RB)
Value* ra = f.And(f.LoadGPR(i.X.RT), f.Not(f.LoadGPR(i.X.RB))); Value* ra = f.And(f.LoadGPR(i.X.RT), f.Not(f.LoadGPR(i.X.RB)));
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -665,8 +665,8 @@ XEEMITTER(andix, 0x70000000, D )(PPCHIRBuilder& f, InstrData& i) {
Value* ra = f.And( Value* ra = f.And(
f.LoadGPR(i.D.RT), f.LoadGPR(i.D.RT),
f.LoadConstant((uint64_t)i.D.DS)); f.LoadConstant((uint64_t)i.D.DS));
f.UpdateCR(0, ra);
f.StoreGPR(i.D.RA, ra); f.StoreGPR(i.D.RA, ra);
f.UpdateCR(0, ra);
return 0; return 0;
} }
@ -675,8 +675,8 @@ XEEMITTER(andisx, 0x74000000, D )(PPCHIRBuilder& f, InstrData& i) {
Value* ra = f.And( Value* ra = f.And(
f.LoadGPR(i.D.RT), f.LoadGPR(i.D.RT),
f.LoadConstant((uint64_t(i.D.DS) << 16))); f.LoadConstant((uint64_t(i.D.DS) << 16)));
f.UpdateCR(0, ra);
f.StoreGPR(i.D.RA, ra); f.StoreGPR(i.D.RA, ra);
f.UpdateCR(0, ra);
return 0; return 0;
} }
@ -688,10 +688,10 @@ XEEMITTER(cntlzdx, 0x7C000074, X )(PPCHIRBuilder& f, InstrData& i) {
// RA <- n // RA <- n
Value* v = f.CountLeadingZeros(f.LoadGPR(i.X.RT)); Value* v = f.CountLeadingZeros(f.LoadGPR(i.X.RT));
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.X.RA, v);
return 0; return 0;
} }
@ -704,10 +704,10 @@ XEEMITTER(cntlzwx, 0x7C000034, X )(PPCHIRBuilder& f, InstrData& i) {
Value* v = f.CountLeadingZeros( Value* v = f.CountLeadingZeros(
f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)); f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.X.RA, v);
return 0; return 0;
} }
@ -715,10 +715,10 @@ XEEMITTER(eqvx, 0x7C000238, X )(PPCHIRBuilder& f, InstrData& i) {
// RA <- (RS) == (RB) // RA <- (RS) == (RB)
Value* ra = f.Xor(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); Value* ra = f.Xor(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
ra = f.Not(ra); ra = f.Not(ra);
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -728,10 +728,10 @@ XEEMITTER(extsbx, 0x7C000774, X )(PPCHIRBuilder& f, InstrData& i) {
// RA[0:55] <- i56.s // RA[0:55] <- i56.s
Value* rt = f.LoadGPR(i.X.RT); Value* rt = f.LoadGPR(i.X.RT);
rt = f.SignExtend(f.Truncate(rt, INT8_TYPE), INT64_TYPE); rt = f.SignExtend(f.Truncate(rt, INT8_TYPE), INT64_TYPE);
f.StoreGPR(i.X.RA, rt);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, rt); f.UpdateCR(0, rt);
} }
f.StoreGPR(i.X.RA, rt);
return 0; return 0;
} }
@ -741,10 +741,10 @@ XEEMITTER(extshx, 0x7C000734, X )(PPCHIRBuilder& f, InstrData& i) {
// RA[0:47] <- 48.s // RA[0:47] <- 48.s
Value* rt = f.LoadGPR(i.X.RT); Value* rt = f.LoadGPR(i.X.RT);
rt = f.SignExtend(f.Truncate(rt, INT16_TYPE), INT64_TYPE); rt = f.SignExtend(f.Truncate(rt, INT16_TYPE), INT64_TYPE);
f.StoreGPR(i.X.RA, rt);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, rt); f.UpdateCR(0, rt);
} }
f.StoreGPR(i.X.RA, rt);
return 0; return 0;
} }
@ -754,10 +754,10 @@ XEEMITTER(extswx, 0x7C0007B4, X )(PPCHIRBuilder& f, InstrData& i) {
// RA[0:31] <- i32.s // RA[0:31] <- i32.s
Value* rt = f.LoadGPR(i.X.RT); Value* rt = f.LoadGPR(i.X.RT);
rt = f.SignExtend(f.Truncate(rt, INT32_TYPE), INT64_TYPE); rt = f.SignExtend(f.Truncate(rt, INT32_TYPE), INT64_TYPE);
f.StoreGPR(i.X.RA, rt);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, rt); f.UpdateCR(0, rt);
} }
f.StoreGPR(i.X.RA, rt);
return 0; return 0;
} }
@ -767,10 +767,10 @@ XEEMITTER(nandx, 0x7C0003B8, X )(PPCHIRBuilder& f, InstrData& i) {
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.LoadGPR(i.X.RB)); f.LoadGPR(i.X.RB));
ra = f.Not(ra); ra = f.Not(ra);
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -780,10 +780,10 @@ XEEMITTER(norx, 0x7C0000F8, X )(PPCHIRBuilder& f, InstrData& i) {
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.LoadGPR(i.X.RB)); f.LoadGPR(i.X.RB));
ra = f.Not(ra); ra = f.Not(ra);
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -803,10 +803,10 @@ XEEMITTER(orx, 0x7C000378, X )(PPCHIRBuilder& f, InstrData& i) {
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.LoadGPR(i.X.RB)); f.LoadGPR(i.X.RB));
} }
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -815,10 +815,10 @@ XEEMITTER(orcx, 0x7C000338, X )(PPCHIRBuilder& f, InstrData& i) {
Value* ra = f.Or( Value* ra = f.Or(
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.Not(f.LoadGPR(i.X.RB))); f.Not(f.LoadGPR(i.X.RB)));
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -849,10 +849,10 @@ XEEMITTER(xorx, 0x7C000278, X )(PPCHIRBuilder& f, InstrData& i) {
Value* ra = f.Xor( Value* ra = f.Xor(
f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RT),
f.LoadGPR(i.X.RB)); f.LoadGPR(i.X.RB));
f.StoreGPR(i.X.RA, ra);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, ra); f.UpdateCR(0, ra);
} }
f.StoreGPR(i.X.RA, ra);
return 0; return 0;
} }
@ -895,10 +895,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
if (m != 0xFFFFFFFFFFFFFFFF) { if (m != 0xFFFFFFFFFFFFFFFF) {
v = f.And(v, f.LoadConstant(m)); v = f.And(v, f.LoadConstant(m));
} }
f.StoreGPR(i.MD.RA, v);
if (i.MD.Rc) { if (i.MD.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.MD.RA, v);
return 0; return 0;
} else if (i.MD.idx == 1) { } else if (i.MD.idx == 1) {
// XEEMITTER(rldicrx, 0x78000004, MD ) // XEEMITTER(rldicrx, 0x78000004, MD )
@ -922,10 +922,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
v = f.And(v, f.LoadConstant(m)); v = f.And(v, f.LoadConstant(m));
} }
} }
f.StoreGPR(i.MD.RA, v);
if (i.MD.Rc) { if (i.MD.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.MD.RA, v);
return 0; return 0;
} else if (i.MD.idx == 2) { } else if (i.MD.idx == 2) {
// XEEMITTER(rldicx, 0x78000008, MD ) // XEEMITTER(rldicx, 0x78000008, MD )
@ -959,10 +959,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
f.And(v, f.LoadConstant(m)), f.And(v, f.LoadConstant(m)),
f.And(ra, f.LoadConstant(~m))); f.And(ra, f.LoadConstant(~m)));
} }
f.StoreGPR(i.MD.RA, v);
if (i.MD.Rc) { if (i.MD.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.MD.RA, v);
return 0; return 0;
} else { } else {
XEINSTRNOTIMPLEMENTED(); XEINSTRNOTIMPLEMENTED();
@ -987,10 +987,10 @@ XEEMITTER(rlwimix, 0x50000000, M )(PPCHIRBuilder& f, InstrData& i) {
} }
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstant((~(uint64_t)m)))); v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstant((~(uint64_t)m))));
f.StoreGPR(i.M.RA, v);
if (i.M.Rc) { if (i.M.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.M.RA, v);
return 0; return 0;
} }
@ -1014,10 +1014,10 @@ XEEMITTER(rlwinmx, 0x54000000, M )(PPCHIRBuilder& f, InstrData& i) {
v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32))); v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32)));
} }
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.M.RA, v);
if (i.M.Rc) { if (i.M.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.M.RA, v);
return 0; return 0;
} }
@ -1027,7 +1027,8 @@ XEEMITTER(rlwnmx, 0x5C000000, M )(PPCHIRBuilder& f, InstrData& i) {
// m <- MASK(MB+32, ME+32) // m <- MASK(MB+32, ME+32)
// RA <- r & m // RA <- r & m
Value* v = f.Truncate(f.LoadGPR(i.M.RT), INT32_TYPE); Value* v = f.Truncate(f.LoadGPR(i.M.RT), INT32_TYPE);
Value* sh = f.And(f.LoadGPR(i.M.SH), f.LoadConstant(0x1F)); Value* sh = f.And(f.Truncate(f.LoadGPR(i.M.SH), INT32_TYPE),
f.LoadConstant(0x1F));
v = f.RotateLeft(v, sh); v = f.RotateLeft(v, sh);
// Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here // Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here
// as our truncation/zero-extend does it for us. // as our truncation/zero-extend does it for us.
@ -1035,10 +1036,10 @@ XEEMITTER(rlwnmx, 0x5C000000, M )(PPCHIRBuilder& f, InstrData& i) {
v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32))); v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32)));
} }
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.M.RA, v);
if (i.M.Rc) { if (i.M.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.M.RA, v);
return 0; return 0;
} }
@ -1145,7 +1146,7 @@ XEEMITTER(sradx, 0x7C000634, X )(PPCHIRBuilder& f, InstrData& i) {
// CA is set to 1 if the low-order 32 bits of (RS) contain a negative number // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number
// and any 1-bits are shifted out of position 63; otherwise CA is set to 0. // and any 1-bits are shifted out of position 63; otherwise CA is set to 0.
// We already have ca set to indicate the pos 63 bit, now just and in sign. // We already have ca set to indicate the pos 63 bit, now just and in sign.
ca = f.And(ca, f.Shr(v, 63)); ca = f.And(ca, f.Truncate(f.Shr(v, 63), INT8_TYPE));
f.StoreCA(ca); f.StoreCA(ca);
f.StoreGPR(i.X.RA, v); f.StoreGPR(i.X.RA, v);
@ -1173,15 +1174,15 @@ XEEMITTER(sradix, 0x7C000674, XS )(PPCHIRBuilder& f, InstrData& i) {
XEASSERT(sh); XEASSERT(sh);
uint64_t mask = XEMASK(64 - sh, 63); uint64_t mask = XEMASK(64 - sh, 63);
Value* ca = f.And( Value* ca = f.And(
f.Shr(v, 63), f.Truncate(f.Shr(v, 63), INT8_TYPE),
f.IsTrue(f.And(v, f.LoadConstant(mask)))); f.IsTrue(f.And(v, f.LoadConstant(mask))));
f.StoreCA(ca); f.StoreCA(ca);
v = f.Sha(v, sh); v = f.Sha(v, sh);
f.StoreGPR(i.XS.RA, v);
if (i.XS.Rc) { if (i.XS.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
f.StoreGPR(i.XS.RA, v);
return 0; return 0;
} }
@ -1197,12 +1198,12 @@ XEEMITTER(srawx, 0x7C000630, X )(PPCHIRBuilder& f, InstrData& i) {
Value* v = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE); Value* v = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
Value* sh = f.And( Value* sh = f.And(
f.Truncate(f.LoadGPR(i.X.RB), INT32_TYPE), f.Truncate(f.LoadGPR(i.X.RB), INT32_TYPE),
f.LoadConstant((int8_t)0x7F)); f.LoadConstant(0x7F));
// CA is set if any bits are shifted out of the right and if the result // CA is set if any bits are shifted out of the right and if the result
// is negative. // is negative.
Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh)); Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh));
Value* ca = f.And( Value* ca = f.And(
f.Shr(v, 31), f.Truncate(f.Shr(v, 31), INT8_TYPE),
f.IsTrue(f.And(v, mask))); f.IsTrue(f.And(v, mask)));
f.StoreCA(ca); f.StoreCA(ca);
v = f.Sha(v, sh), v = f.Sha(v, sh),
@ -1234,8 +1235,8 @@ XEEMITTER(srawix, 0x7C000670, X )(PPCHIRBuilder& f, InstrData& i) {
// is negative. // is negative.
uint32_t mask = (uint32_t)XEMASK(64 - i.X.RB, 63); uint32_t mask = (uint32_t)XEMASK(64 - i.X.RB, 63);
ca = f.And( ca = f.And(
f.Shr(v, 31), f.Truncate(f.Shr(v, 31), INT8_TYPE),
f.ZeroExtend(f.IsTrue(f.And(v, f.LoadConstant(mask))), INT32_TYPE)); f.IsTrue(f.And(v, f.LoadConstant(mask))));
v = f.Sha(v, (int8_t)i.X.RB), v = f.Sha(v, (int8_t)i.X.RB),
v = f.SignExtend(v, INT64_TYPE); v = f.SignExtend(v, INT64_TYPE);

View File

@ -35,6 +35,7 @@ int InstrEmit_branch(
// be correct for returns. // be correct for returns.
if (lk) { if (lk) {
Value* return_address = f.LoadConstant(cia + 4); Value* return_address = f.LoadConstant(cia + 4);
f.SetReturnAddress(return_address);
f.StoreLR(return_address); f.StoreLR(return_address);
} }
@ -104,6 +105,10 @@ int InstrEmit_branch(
// // TODO(benvanik): evaluate hint here. // // TODO(benvanik): evaluate hint here.
// c.je(e.GetReturnLabel(), kCondHintLikely); // c.je(e.GetReturnLabel(), kCondHintLikely);
//} //}
#if 0
// This breaks longjump, as that uses blr with a non-return lr.
// It'd be nice to move SET_RETURN_ADDRESS semantics up into context
// so that we can just use this.
if (!lk && nia_is_lr) { if (!lk && nia_is_lr) {
// Return (most likely). // Return (most likely).
// TODO(benvanik): test? ReturnCheck()? // TODO(benvanik): test? ReturnCheck()?
@ -116,7 +121,14 @@ int InstrEmit_branch(
f.Return(); f.Return();
} }
} else { } else {
#else
{
#endif
// Jump to pointer. // Jump to pointer.
bool likely_return = !lk && nia_is_lr;
if (likely_return) {
call_flags |= CALL_POSSIBLE_RETURN;
}
if (cond) { if (cond) {
if (!expect_true) { if (!expect_true) {
cond = f.IsFalse(cond); cond = f.IsFalse(cond);
@ -380,8 +392,8 @@ XEEMITTER(mcrf, 0x4C000000, XL )(PPCHIRBuilder& f, InstrData& i) {
// System linkage (A-24) // System linkage (A-24)
XEEMITTER(sc, 0x44000002, SC )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(sc, 0x44000002, SC )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); f.CallExtern(f.symbol_info());
return 1; return 0;
} }

View File

@ -891,7 +891,8 @@ XEEMITTER(stfiwx, 0x7C0007AE, X )(PPCHIRBuilder& f, InstrData& i) {
// EA <- b + (RB) // EA <- b + (RB)
// MEM(EA, 4) <- (FRS)[32:63] // MEM(EA, 4) <- (FRS)[32:63]
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
f.Store(ea, f.ByteSwap(f.Cast(f.LoadFPR(i.X.RT), INT32_TYPE))); f.Store(ea, f.ByteSwap(
f.Truncate(f.Cast(f.LoadFPR(i.X.RT), INT64_TYPE), INT32_TYPE)));
return 0; return 0;
} }

View File

@ -9,6 +9,7 @@
#include <alloy/frontend/ppc/ppc_hir_builder.h> #include <alloy/frontend/ppc/ppc_hir_builder.h>
#include <alloy/alloy-private.h>
#include <alloy/frontend/tracing.h> #include <alloy/frontend/tracing.h>
#include <alloy/frontend/ppc/ppc_context.h> #include <alloy/frontend/ppc/ppc_context.h>
#include <alloy/frontend/ppc/ppc_disasm.h> #include <alloy/frontend/ppc/ppc_disasm.h>
@ -43,6 +44,8 @@ void PPCHIRBuilder::Reset() {
} }
int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) { int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) {
SCOPE_profile_cpu_f("alloy");
Memory* memory = frontend_->memory(); Memory* memory = frontend_->memory();
const uint8_t* p = memory->membase(); const uint8_t* p = memory->membase();
@ -125,10 +128,10 @@ int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) {
typedef int (*InstrEmitter)(PPCHIRBuilder& f, InstrData& i); typedef int (*InstrEmitter)(PPCHIRBuilder& f, InstrData& i);
InstrEmitter emit = (InstrEmitter)i.type->emit; InstrEmitter emit = (InstrEmitter)i.type->emit;
/*if (i.address == FLAGS_break_on_instruction) { if (i.address == FLAGS_break_on_instruction) {
Comment("--break-on-instruction target"); Comment("--break-on-instruction target");
DebugBreak(); DebugBreak();
}*/ }
if (!i.type->emit || emit(*this, i)) { if (!i.type->emit || emit(*this, i)) {
XELOGCPU("Unimplemented instr %.8X %.8X %s", XELOGCPU("Unimplemented instr %.8X %.8X %s",
@ -239,18 +242,18 @@ void PPCHIRBuilder::UpdateCR(
void PPCHIRBuilder::UpdateCR( void PPCHIRBuilder::UpdateCR(
uint32_t n, Value* lhs, Value* rhs, bool is_signed) { uint32_t n, Value* lhs, Value* rhs, bool is_signed) {
Value* lt;
Value* gt;
if (is_signed) { if (is_signed) {
lt = CompareSLT(lhs, rhs); Value* lt = CompareSLT(lhs, rhs);
gt = CompareSGT(lhs, rhs); StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
Value* gt = CompareSGT(lhs, rhs);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
} else { } else {
lt = CompareULT(lhs, rhs); Value* lt = CompareULT(lhs, rhs);
gt = CompareUGT(lhs, rhs); StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
Value* gt = CompareUGT(lhs, rhs);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
} }
Value* eq = CompareEQ(lhs, rhs); Value* eq = CompareEQ(lhs, rhs);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 2, eq); StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 2, eq);
// Value* so = AllocValue(UINT8_TYPE); // Value* so = AllocValue(UINT8_TYPE);
@ -279,6 +282,7 @@ Value* PPCHIRBuilder::LoadCA() {
} }
void PPCHIRBuilder::StoreCA(Value* value) { void PPCHIRBuilder::StoreCA(Value* value) {
XEASSERT(value->type == INT8_TYPE);
StoreContext(offsetof(PPCContext, xer_ca), value); StoreContext(offsetof(PPCContext, xer_ca), value);
} }
@ -287,6 +291,7 @@ Value* PPCHIRBuilder::LoadSAT() {
} }
void PPCHIRBuilder::StoreSAT(Value* value) { void PPCHIRBuilder::StoreSAT(Value* value) {
value = Truncate(value, INT8_TYPE);
StoreContext(offsetof(PPCContext, vscr_sat), value); StoreContext(offsetof(PPCContext, vscr_sat), value);
} }

View File

@ -38,6 +38,8 @@ bool PPCScanner::IsRestGprLr(uint64_t address) {
} }
int PPCScanner::FindExtents(FunctionInfo* symbol_info) { int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
SCOPE_profile_cpu_f("alloy");
// This is a simple basic block analyizer. It walks the start address to the // This is a simple basic block analyizer. It walks the start address to the
// end address looking for branches. Each span of instructions between // end address looking for branches. Each span of instructions between
// branches is considered a basic block. When the last blr (that has no // branches is considered a basic block. When the last blr (that has no
@ -286,6 +288,8 @@ int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
} }
std::vector<BlockInfo> PPCScanner::FindBlocks(FunctionInfo* symbol_info) { std::vector<BlockInfo> PPCScanner::FindBlocks(FunctionInfo* symbol_info) {
SCOPE_profile_cpu_f("alloy");
Memory* memory = frontend_->memory(); Memory* memory = frontend_->memory();
const uint8_t* p = memory->membase(); const uint8_t* p = memory->membase();

View File

@ -38,20 +38,38 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) :
assembler_ = backend->CreateAssembler(); assembler_ = backend->CreateAssembler();
assembler_->Initialize(); assembler_->Initialize();
bool validate = FLAGS_validate_hir;
// Build the CFG first.
compiler_->AddPass(new passes::ControlFlowAnalysisPass());
// Passes are executed in the order they are added. Multiple of the same // Passes are executed in the order they are added. Multiple of the same
// pass type may be used. // pass type may be used.
if (validate) compiler_->AddPass(new passes::ValidationPass());
compiler_->AddPass(new passes::ContextPromotionPass()); compiler_->AddPass(new passes::ContextPromotionPass());
if (validate) compiler_->AddPass(new passes::ValidationPass());
compiler_->AddPass(new passes::SimplificationPass()); compiler_->AddPass(new passes::SimplificationPass());
// TODO(benvanik): run repeatedly? if (validate) compiler_->AddPass(new passes::ValidationPass());
compiler_->AddPass(new passes::ConstantPropagationPass()); compiler_->AddPass(new passes::ConstantPropagationPass());
//compiler_->AddPass(new passes::TypePropagationPass()); if (validate) compiler_->AddPass(new passes::ValidationPass());
//compiler_->AddPass(new passes::ByteSwapEliminationPass());
compiler_->AddPass(new passes::SimplificationPass()); compiler_->AddPass(new passes::SimplificationPass());
if (validate) compiler_->AddPass(new passes::ValidationPass());
//compiler_->AddPass(new passes::DeadStoreEliminationPass()); //compiler_->AddPass(new passes::DeadStoreEliminationPass());
//if (validate) compiler_->AddPass(new passes::ValidationPass());
compiler_->AddPass(new passes::DeadCodeEliminationPass()); compiler_->AddPass(new passes::DeadCodeEliminationPass());
if (validate) compiler_->AddPass(new passes::ValidationPass());
// Removes all unneeded variables. Try not to add new ones after this. //// Removes all unneeded variables. Try not to add new ones after this.
compiler_->AddPass(new passes::ValueReductionPass()); //compiler_->AddPass(new passes::ValueReductionPass());
//if (validate) compiler_->AddPass(new passes::ValidationPass());
// Register allocation for the target backend.
// Will modify the HIR to add loads/stores.
// This should be the last pass before finalization, as after this all
// registers are assigned and ready to be emitted.
compiler_->AddPass(new passes::RegisterAllocationPass(
backend->machine_info()));
if (validate) compiler_->AddPass(new passes::ValidationPass());
// Must come last. The HIR is not really HIR after this. // Must come last. The HIR is not really HIR after this.
compiler_->AddPass(new passes::FinalizationPass()); compiler_->AddPass(new passes::FinalizationPass());
@ -68,6 +86,8 @@ int PPCTranslator::Translate(
FunctionInfo* symbol_info, FunctionInfo* symbol_info,
uint32_t debug_info_flags, uint32_t debug_info_flags,
Function** out_function) { Function** out_function) {
SCOPE_profile_cpu_f("alloy");
// Scan the function to find its extents. We only need to do this if we // Scan the function to find its extents. We only need to do this if we
// haven't already been provided with them from some other source. // haven't already been provided with them from some other source.
if (!symbol_info->has_end_address()) { if (!symbol_info->has_end_address()) {

View File

@ -27,10 +27,10 @@ public:
ALLOY_FRONTEND_DEINIT = ALLOY_FRONTEND | (2), ALLOY_FRONTEND_DEINIT = ALLOY_FRONTEND | (2),
}; };
typedef struct { typedef struct Init_s {
static const uint32_t event_type = ALLOY_FRONTEND_INIT; static const uint32_t event_type = ALLOY_FRONTEND_INIT;
} Init; } Init;
typedef struct { typedef struct Deinit_s {
static const uint32_t event_type = ALLOY_FRONTEND_DEINIT; static const uint32_t event_type = ALLOY_FRONTEND_DEINIT;
} Deinit; } Deinit;
}; };

39
src/alloy/hir/block.cc Normal file
View File

@ -0,0 +1,39 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/hir/block.h>
#include <alloy/hir/instr.h>
using namespace alloy;
using namespace alloy::hir;
void Block::AssertNoCycles() {
Instr* hare = instr_head;
Instr* tortoise = instr_head;
if (!hare) {
return;
}
while (hare = hare->next) {
if (hare == tortoise) {
// Cycle!
XEASSERTALWAYS();
}
hare = hare->next;
if (hare == tortoise) {
// Cycle!
XEASSERTALWAYS();
}
tortoise = tortoise->next;
if (!hare || !tortoise) {
return;
}
}
}

View File

@ -12,15 +12,37 @@
#include <alloy/core.h> #include <alloy/core.h>
XEDECLARECLASS1(llvm, BitVector);
namespace alloy { namespace alloy {
namespace hir { namespace hir {
class Block;
class HIRBuilder; class HIRBuilder;
class Instr; class Instr;
class Label; class Label;
class Edge {
public:
enum EdgeFlags {
UNCONDITIONAL = (1 << 0),
DOMINATES = (1 << 1),
};
public:
Edge* outgoing_next;
Edge* outgoing_prev;
Edge* incoming_next;
Edge* incoming_prev;
Block* src;
Block* dest;
uint32_t flags;
};
class Block { class Block {
public: public:
Arena* arena; Arena* arena;
@ -28,6 +50,10 @@ public:
Block* next; Block* next;
Block* prev; Block* prev;
Edge* incoming_edge_head;
Edge* outgoing_edge_head;
llvm::BitVector* incoming_values;
Label* label_head; Label* label_head;
Label* label_tail; Label* label_tail;
@ -35,6 +61,8 @@ public:
Instr* instr_tail; Instr* instr_tail;
uint16_t ordinal; uint16_t ordinal;
void AssertNoCycles();
}; };

View File

@ -41,6 +41,7 @@ void HIRBuilder::Reset() {
attributes_ = 0; attributes_ = 0;
next_label_id_ = 0; next_label_id_ = 0;
next_value_ordinal_ = 0; next_value_ordinal_ = 0;
locals_.clear();
block_head_ = block_tail_ = NULL; block_head_ = block_tail_ = NULL;
current_block_ = NULL; current_block_ = NULL;
#if XE_DEBUG #if XE_DEBUG
@ -50,6 +51,8 @@ void HIRBuilder::Reset() {
} }
int HIRBuilder::Finalize() { int HIRBuilder::Finalize() {
SCOPE_profile_cpu_f("alloy");
// Scan blocks in order and add fallthrough branches. These are needed for // Scan blocks in order and add fallthrough branches. These are needed for
// analysis passes to work. We may have also added blocks out of order and // analysis passes to work. We may have also added blocks out of order and
// need to ensure they fall through in the right order. // need to ensure they fall through in the right order.
@ -71,7 +74,7 @@ int HIRBuilder::Finalize() {
// No following block. // No following block.
// Sometimes VC++ generates functions with bl at the end even if they // Sometimes VC++ generates functions with bl at the end even if they
// will never return. Just add a return to satisfy things. // will never return. Just add a return to satisfy things.
XELOGW("Fall-through out of the function."); //XELOGW("Fall-through out of the function.");
Trap(); Trap();
Return(); Return();
current_block_ = NULL; current_block_ = NULL;
@ -91,7 +94,7 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) {
case INT8_TYPE: str->Append("%X", value->constant.i8); break; case INT8_TYPE: str->Append("%X", value->constant.i8); break;
case INT16_TYPE: str->Append("%X", value->constant.i16); break; case INT16_TYPE: str->Append("%X", value->constant.i16); break;
case INT32_TYPE: str->Append("%X", value->constant.i32); break; case INT32_TYPE: str->Append("%X", value->constant.i32); break;
case INT64_TYPE: str->Append("%X", value->constant.i64); break; case INT64_TYPE: str->Append("%llX", value->constant.i64); break;
case FLOAT32_TYPE: str->Append("%F", value->constant.f32); break; case FLOAT32_TYPE: str->Append("%F", value->constant.f32); break;
case FLOAT64_TYPE: str->Append("%F", value->constant.f64); break; case FLOAT64_TYPE: str->Append("%F", value->constant.f64); break;
case VEC128_TYPE: str->Append("(%F,%F,%F,%F)", case VEC128_TYPE: str->Append("(%F,%F,%F,%F)",
@ -107,6 +110,9 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) {
}; };
str->Append("v%d.%s", value->ordinal, type_names[value->type]); str->Append("v%d.%s", value->ordinal, type_names[value->type]);
} }
if (value->reg.index != -1) {
str->Append("<%s%d>", value->reg.set->name, value->reg.index);
}
} }
void HIRBuilder::DumpOp( void HIRBuilder::DumpOp(
@ -137,10 +143,19 @@ void HIRBuilder::DumpOp(
} }
void HIRBuilder::Dump(StringBuffer* str) { void HIRBuilder::Dump(StringBuffer* str) {
SCOPE_profile_cpu_f("alloy");
if (attributes_) { if (attributes_) {
str->Append("; attributes = %.8X\n", attributes_); str->Append("; attributes = %.8X\n", attributes_);
} }
for (auto it = locals_.begin(); it != locals_.end(); ++it) {
auto local = *it;
str->Append(" ; local ");
DumpValue(str, local);
str->Append("\n");
}
uint32_t block_ordinal = 0; uint32_t block_ordinal = 0;
Block* block = block_head_; Block* block = block_head_;
while (block) { while (block) {
@ -161,6 +176,39 @@ void HIRBuilder::Dump(StringBuffer* str) {
label = label->next; label = label->next;
} }
Edge* incoming_edge = block->incoming_edge_head;
while (incoming_edge) {
auto src_label = incoming_edge->src->label_head;
if (src_label && src_label->name) {
str->Append(" ; in: %s", src_label->name);
} else if (src_label) {
str->Append(" ; in: label%d", src_label->id);
} else {
str->Append(" ; in: <block%d>",
incoming_edge->src->ordinal);
}
str->Append(", dom:%d, uncond:%d\n",
(incoming_edge->flags & Edge::DOMINATES) ? 1 : 0,
(incoming_edge->flags & Edge::UNCONDITIONAL) ? 1 : 0);
incoming_edge = incoming_edge->incoming_next;
}
Edge* outgoing_edge = block->outgoing_edge_head;
while (outgoing_edge) {
auto dest_label = outgoing_edge->dest->label_head;
if (dest_label && dest_label->name) {
str->Append(" ; out: %s", dest_label->name);
} else if (dest_label) {
str->Append(" ; out: label%d", dest_label->id);
} else {
str->Append(" ; out: <block%d>",
outgoing_edge->dest->ordinal);
}
str->Append(", dom:%d, uncond:%d\n",
(outgoing_edge->flags & Edge::DOMINATES) ? 1 : 0,
(outgoing_edge->flags & Edge::UNCONDITIONAL) ? 1 : 0);
outgoing_edge = outgoing_edge->outgoing_next;
}
Instr* i = block->instr_head; Instr* i = block->instr_head;
while (i) { while (i) {
if (i->opcode->flags & OPCODE_FLAG_HIDE) { if (i->opcode->flags & OPCODE_FLAG_HIDE) {
@ -208,6 +256,29 @@ void HIRBuilder::Dump(StringBuffer* str) {
} }
} }
void HIRBuilder::AssertNoCycles() {
Block* hare = block_head_;
Block* tortoise = block_head_;
if (!hare) {
return;
}
while (hare = hare->next) {
if (hare == tortoise) {
// Cycle!
XEASSERTALWAYS();
}
hare = hare->next;
if (hare == tortoise) {
// Cycle!
XEASSERTALWAYS();
}
tortoise = tortoise->next;
if (!hare || !tortoise) {
return;
}
}
}
Block* HIRBuilder::current_block() const { Block* HIRBuilder::current_block() const {
return current_block_; return current_block_;
} }
@ -303,6 +374,7 @@ void HIRBuilder::InsertLabel(Label* label, Instr* prev_instr) {
block_tail_ = new_block; block_tail_ = new_block;
} }
new_block->label_head = new_block->label_tail = label; new_block->label_head = new_block->label_tail = label;
new_block->incoming_edge_head = new_block->outgoing_edge_head = NULL;
label->block = new_block; label->block = new_block;
label->prev = label->next = NULL; label->prev = label->next = NULL;
@ -319,8 +391,7 @@ void HIRBuilder::InsertLabel(Label* label, Instr* prev_instr) {
new_block->instr_tail = old_prev_tail; new_block->instr_tail = old_prev_tail;
} }
for (auto instr = new_block->instr_head; instr != new_block->instr_tail; for (auto instr = new_block->instr_head; instr; instr = instr->next) {
instr = instr->next) {
instr->block = new_block; instr->block = new_block;
} }
@ -342,6 +413,19 @@ void HIRBuilder::ResetLabelTags() {
} }
} }
void HIRBuilder::AddEdge(Block* src, Block* dest, uint32_t flags) {
Edge* edge = arena_->Alloc<Edge>();
edge->src = src;
edge->dest = dest;
edge->flags = flags;
edge->outgoing_prev = NULL;
edge->outgoing_next = src->outgoing_edge_head;
src->outgoing_edge_head = edge;
edge->incoming_prev = NULL;
edge->incoming_next = dest->incoming_edge_head;
dest->incoming_edge_head = edge;
}
Block* HIRBuilder::AppendBlock() { Block* HIRBuilder::AppendBlock() {
Block* block = arena_->Alloc<Block>(); Block* block = arena_->Alloc<Block>();
block->arena = arena_; block->arena = arena_;
@ -356,6 +440,7 @@ Block* HIRBuilder::AppendBlock() {
} }
current_block_ = block; current_block_ = block;
block->label_head = block->label_tail = NULL; block->label_head = block->label_tail = NULL;
block->incoming_edge_head = block->outgoing_edge_head = NULL;
block->instr_head = block->instr_tail = NULL; block->instr_head = block->instr_tail = NULL;
return block; return block;
} }
@ -398,6 +483,7 @@ Instr* HIRBuilder::AppendInstr(
if (!block->instr_head) { if (!block->instr_head) {
block->instr_head = instr; block->instr_head = instr;
} }
instr->ordinal = -1;
instr->block = block; instr->block = block;
instr->opcode = &opcode_info; instr->opcode = &opcode_info;
instr->flags = flags; instr->flags = flags;
@ -420,8 +506,10 @@ Value* HIRBuilder::AllocValue(TypeName type) {
value->def = NULL; value->def = NULL;
value->use_head = NULL; value->use_head = NULL;
value->last_use = NULL; value->last_use = NULL;
value->local_slot = NULL;
value->tag = NULL; value->tag = NULL;
value->reg = -1; value->reg.set = NULL;
value->reg.index = -1;
return value; return value;
} }
@ -434,8 +522,10 @@ Value* HIRBuilder::CloneValue(Value* source) {
value->def = NULL; value->def = NULL;
value->use_head = NULL; value->use_head = NULL;
value->last_use = NULL; value->last_use = NULL;
value->local_slot = NULL;
value->tag = NULL; value->tag = NULL;
value->reg = -1; value->reg.set = NULL;
value->reg.index = -1;
return value; return value;
} }
@ -557,6 +647,13 @@ void HIRBuilder::CallIndirectTrue(
EndBlock(); EndBlock();
} }
void HIRBuilder::CallExtern(FunctionInfo* symbol_info) {
Instr* i = AppendInstr(OPCODE_CALL_EXTERN_info, 0);
i->src1.symbol_info = symbol_info;
i->src2.value = i->src3.value = NULL;
EndBlock();
}
void HIRBuilder::Return() { void HIRBuilder::Return() {
Instr* i = AppendInstr(OPCODE_RETURN_info, 0); Instr* i = AppendInstr(OPCODE_RETURN_info, 0);
i->src1.value = i->src2.value = i->src3.value = NULL; i->src1.value = i->src2.value = i->src3.value = NULL;
@ -578,6 +675,12 @@ void HIRBuilder::ReturnTrue(Value* cond) {
EndBlock(); EndBlock();
} }
void HIRBuilder::SetReturnAddress(Value* value) {
Instr* i = AppendInstr(OPCODE_SET_RETURN_ADDRESS_info, 0);
i->set_src1(value);
i->src2.value = i->src3.value = NULL;
}
void HIRBuilder::Branch(Label* label, uint32_t branch_flags) { void HIRBuilder::Branch(Label* label, uint32_t branch_flags) {
Instr* i = AppendInstr(OPCODE_BRANCH_info, branch_flags); Instr* i = AppendInstr(OPCODE_BRANCH_info, branch_flags);
i->src1.label = label; i->src1.label = label;
@ -870,6 +973,28 @@ Value* HIRBuilder::LoadClock() {
return i->dest; return i->dest;
} }
Value* HIRBuilder::AllocLocal(TypeName type) {
Value* slot = AllocValue(type);
locals_.push_back(slot);
return slot;
}
Value* HIRBuilder::LoadLocal(Value* slot) {
Instr* i = AppendInstr(
OPCODE_LOAD_LOCAL_info, 0,
AllocValue(slot->type));
i->set_src1(slot);
i->src2.value = i->src3.value = NULL;
return i->dest;
}
void HIRBuilder::StoreLocal(Value* slot, Value* value) {
Instr* i = AppendInstr(OPCODE_STORE_LOCAL_info, 0);
i->set_src1(slot);
i->set_src2(value);
i->src3.value = NULL;
}
Value* HIRBuilder::LoadContext(size_t offset, TypeName type) { Value* HIRBuilder::LoadContext(size_t offset, TypeName type) {
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_LOAD_CONTEXT_info, 0, OPCODE_LOAD_CONTEXT_info, 0,
@ -1631,16 +1756,19 @@ Value* HIRBuilder::Extract(Value* value, Value* index,
TypeName target_type) { TypeName target_type) {
// TODO(benvanik): could do some of this as constants. // TODO(benvanik): could do some of this as constants.
Value* trunc_index = index->type != INT8_TYPE ?
Truncate(index, INT8_TYPE) : index;
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_EXTRACT_info, 0, OPCODE_EXTRACT_info, 0,
AllocValue(target_type)); AllocValue(target_type));
i->set_src1(value); i->set_src1(value);
i->set_src2(ZeroExtend(index, INT64_TYPE)); i->set_src2(trunc_index);
i->src3.value = NULL; i->src3.value = NULL;
return i->dest; return i->dest;
} }
Value* HIRBuilder::Extract(Value* value, uint64_t index, Value* HIRBuilder::Extract(Value* value, uint8_t index,
TypeName target_type) { TypeName target_type) {
return Extract(value, LoadConstant(index), target_type); return Extract(value, LoadConstant(index), target_type);
} }

View File

@ -35,13 +35,19 @@ public:
virtual int Finalize(); virtual int Finalize();
void Dump(StringBuffer* str); void Dump(StringBuffer* str);
void AssertNoCycles();
Arena* arena() const { return arena_; } Arena* arena() const { return arena_; }
uint32_t attributes() const { return attributes_; } uint32_t attributes() const { return attributes_; }
void set_attributes(uint32_t value) { attributes_ = value; } void set_attributes(uint32_t value) { attributes_ = value; }
std::vector<Value*>& locals() { return locals_; }
uint32_t max_value_ordinal() const { return next_value_ordinal_; }
Block* first_block() const { return block_head_; } Block* first_block() const { return block_head_; }
Block* last_block() const { return block_tail_; }
Block* current_block() const; Block* current_block() const;
Instr* last_instr() const; Instr* last_instr() const;
@ -50,12 +56,11 @@ public:
void InsertLabel(Label* label, Instr* prev_instr); void InsertLabel(Label* label, Instr* prev_instr);
void ResetLabelTags(); void ResetLabelTags();
void AddEdge(Block* src, Block* dest, uint32_t flags);
// static allocations: // static allocations:
// Value* AllocStatic(size_t length); // Value* AllocStatic(size_t length);
// stack allocations:
// Value* AllocLocal(TypeName type);
void Comment(const char* format, ...); void Comment(const char* format, ...);
void Nop(); void Nop();
@ -74,8 +79,10 @@ public:
uint32_t call_flags = 0); uint32_t call_flags = 0);
void CallIndirect(Value* value, uint32_t call_flags = 0); void CallIndirect(Value* value, uint32_t call_flags = 0);
void CallIndirectTrue(Value* cond, Value* value, uint32_t call_flags = 0); void CallIndirectTrue(Value* cond, Value* value, uint32_t call_flags = 0);
void CallExtern(runtime::FunctionInfo* symbol_info);
void Return(); void Return();
void ReturnTrue(Value* cond); void ReturnTrue(Value* cond);
void SetReturnAddress(Value* value);
void Branch(Label* label, uint32_t branch_flags = 0); void Branch(Label* label, uint32_t branch_flags = 0);
void Branch(Block* block, uint32_t branch_flags = 0); void Branch(Block* block, uint32_t branch_flags = 0);
@ -115,6 +122,10 @@ public:
Value* LoadClock(); Value* LoadClock();
Value* AllocLocal(TypeName type);
Value* LoadLocal(Value* slot);
void StoreLocal(Value* slot, Value* value);
Value* LoadContext(size_t offset, TypeName type); Value* LoadContext(size_t offset, TypeName type);
void StoreContext(size_t offset, Value* value); void StoreContext(size_t offset, Value* value);
@ -186,7 +197,7 @@ public:
Value* Insert(Value* value, Value* index, Value* part); Value* Insert(Value* value, Value* index, Value* part);
Value* Insert(Value* value, uint64_t index, Value* part); Value* Insert(Value* value, uint64_t index, Value* part);
Value* Extract(Value* value, Value* index, TypeName target_type); Value* Extract(Value* value, Value* index, TypeName target_type);
Value* Extract(Value* value, uint64_t index, TypeName target_type); Value* Extract(Value* value, uint8_t index, TypeName target_type);
// i8->i16/i32/... (i8|i8 / i8|i8|i8|i8 / ...) // i8->i16/i32/... (i8|i8 / i8|i8|i8|i8 / ...)
// i8/i16/i32 -> vec128 // i8/i16/i32 -> vec128
Value* Splat(Value* value, TypeName target_type); Value* Splat(Value* value, TypeName target_type);
@ -229,6 +240,8 @@ protected:
uint32_t next_label_id_; uint32_t next_label_id_;
uint32_t next_value_ordinal_; uint32_t next_value_ordinal_;
std::vector<Value*> locals_;
Block* block_head_; Block* block_head_;
Block* block_tail_; Block* block_tail_;
Block* current_block_; Block* current_block_;

View File

@ -48,17 +48,34 @@ void Instr::set_src3(Value* value) {
src3_use = value ? value->AddUse(block->arena, this) : NULL; src3_use = value ? value->AddUse(block->arena, this) : NULL;
} }
bool Instr::Match(SignatureType dest_req, void Instr::MoveBefore(Instr* other) {
SignatureType src1_req, if (next == other) {
SignatureType src2_req, return;
SignatureType src3_req) const { }
#define TO_SIG_TYPE(v) \
(v ? (v->IsConstant() ? SignatureType((v->type + 1) | SIG_TYPE_C) : SignatureType(v->type + 1)) : SIG_TYPE_X) // Remove from current location.
return if (prev) {
((dest_req == SIG_TYPE_IGNORE) || (dest_req == TO_SIG_TYPE(dest))) && prev->next = next;
((src1_req == SIG_TYPE_IGNORE) || (src1_req == TO_SIG_TYPE(src1.value))) && } else {
((src2_req == SIG_TYPE_IGNORE) || (src2_req == TO_SIG_TYPE(src2.value))) && block->instr_head = next;
((src3_req == SIG_TYPE_IGNORE) || (src3_req == TO_SIG_TYPE(src3.value))); }
if (next) {
next->prev = prev;
} else {
block->instr_tail = prev;
}
// Insert into new location.
block = other->block;
next = other;
prev = other->prev;
other->prev = this;
if (prev) {
prev->next = this;
}
if (other == block->instr_head) {
block->instr_head = this;
}
} }
void Instr::Replace(const OpcodeInfo* opcode, uint16_t flags) { void Instr::Replace(const OpcodeInfo* opcode, uint16_t flags) {

View File

@ -24,26 +24,6 @@ namespace hir {
class Block; class Block;
class Label; class Label;
enum SignatureType {
SIG_TYPE_X = 0,
SIG_TYPE_I8 = 1,
SIG_TYPE_I16 = 2,
SIG_TYPE_I32 = 3,
SIG_TYPE_I64 = 4,
SIG_TYPE_F32 = 5,
SIG_TYPE_F64 = 6,
SIG_TYPE_V128 = 7,
SIG_TYPE_C = (1 << 3),
SIG_TYPE_I8C = SIG_TYPE_C | SIG_TYPE_I8,
SIG_TYPE_I16C = SIG_TYPE_C | SIG_TYPE_I16,
SIG_TYPE_I32C = SIG_TYPE_C | SIG_TYPE_I32,
SIG_TYPE_I64C = SIG_TYPE_C | SIG_TYPE_I64,
SIG_TYPE_F32C = SIG_TYPE_C | SIG_TYPE_F32,
SIG_TYPE_F64C = SIG_TYPE_C | SIG_TYPE_F64,
SIG_TYPE_V128C = SIG_TYPE_C | SIG_TYPE_V128,
SIG_TYPE_IGNORE = 0xFF,
};
class Instr { class Instr {
public: public:
Block* block; Block* block;
@ -52,7 +32,7 @@ public:
const OpcodeInfo* opcode; const OpcodeInfo* opcode;
uint16_t flags; uint16_t flags;
uint16_t ordinal; uint32_t ordinal;
typedef union { typedef union {
runtime::FunctionInfo* symbol_info; runtime::FunctionInfo* symbol_info;
@ -74,11 +54,7 @@ public:
void set_src2(Value* value); void set_src2(Value* value);
void set_src3(Value* value); void set_src3(Value* value);
bool Match(SignatureType dest = SIG_TYPE_X, void MoveBefore(Instr* other);
SignatureType src1 = SIG_TYPE_X,
SignatureType src2 = SIG_TYPE_X,
SignatureType src3 = SIG_TYPE_X) const;
void Replace(const OpcodeInfo* opcode, uint16_t flags); void Replace(const OpcodeInfo* opcode, uint16_t flags);
void Remove(); void Remove();
}; };

View File

@ -19,6 +19,7 @@ namespace hir {
enum CallFlags { enum CallFlags {
CALL_TAIL = (1 << 1), CALL_TAIL = (1 << 1),
CALL_POSSIBLE_RETURN = (1 << 2),
}; };
enum BranchFlags { enum BranchFlags {
BRANCH_LIKELY = (1 << 1), BRANCH_LIKELY = (1 << 1),
@ -94,8 +95,10 @@ enum Opcode {
OPCODE_CALL_TRUE, OPCODE_CALL_TRUE,
OPCODE_CALL_INDIRECT, OPCODE_CALL_INDIRECT,
OPCODE_CALL_INDIRECT_TRUE, OPCODE_CALL_INDIRECT_TRUE,
OPCODE_CALL_EXTERN,
OPCODE_RETURN, OPCODE_RETURN,
OPCODE_RETURN_TRUE, OPCODE_RETURN_TRUE,
OPCODE_SET_RETURN_ADDRESS,
OPCODE_BRANCH, OPCODE_BRANCH,
OPCODE_BRANCH_TRUE, OPCODE_BRANCH_TRUE,
@ -116,6 +119,9 @@ enum Opcode {
OPCODE_LOAD_CLOCK, OPCODE_LOAD_CLOCK,
OPCODE_LOAD_LOCAL,
OPCODE_STORE_LOCAL,
OPCODE_LOAD_CONTEXT, OPCODE_LOAD_CONTEXT,
OPCODE_STORE_CONTEXT, OPCODE_STORE_CONTEXT,
@ -201,6 +207,7 @@ enum OpcodeFlags {
OPCODE_FLAG_VOLATILE = (1 << 4), OPCODE_FLAG_VOLATILE = (1 << 4),
OPCODE_FLAG_IGNORE = (1 << 5), OPCODE_FLAG_IGNORE = (1 << 5),
OPCODE_FLAG_HIDE = (1 << 6), OPCODE_FLAG_HIDE = (1 << 6),
OPCODE_FLAG_PAIRED_PREV = (1 << 7),
}; };
enum OpcodeSignatureType { enum OpcodeSignatureType {

View File

@ -11,566 +11,590 @@
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMMENT, OPCODE_COMMENT,
"comment", "comment",
OPCODE_SIG_X, OPCODE_SIG_X_O,
OPCODE_FLAG_IGNORE); OPCODE_FLAG_IGNORE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_NOP, OPCODE_NOP,
"nop", "nop",
OPCODE_SIG_X, OPCODE_SIG_X,
OPCODE_FLAG_IGNORE); OPCODE_FLAG_IGNORE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SOURCE_OFFSET, OPCODE_SOURCE_OFFSET,
"source_offset", "source_offset",
OPCODE_SIG_X_O, OPCODE_SIG_X_O,
OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE); OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DEBUG_BREAK, OPCODE_DEBUG_BREAK,
"debug_break", "debug_break",
OPCODE_SIG_X, OPCODE_SIG_X,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DEBUG_BREAK_TRUE, OPCODE_DEBUG_BREAK_TRUE,
"debug_break_true", "debug_break_true",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_TRAP, OPCODE_TRAP,
"trap", "trap",
OPCODE_SIG_X, OPCODE_SIG_X,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_TRAP_TRUE, OPCODE_TRAP_TRUE,
"trap_true", "trap_true",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL, OPCODE_CALL,
"call", "call",
OPCODE_SIG_X_S, OPCODE_SIG_X_S,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL_TRUE, OPCODE_CALL_TRUE,
"call_true", "call_true",
OPCODE_SIG_X_V_S, OPCODE_SIG_X_V_S,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL_INDIRECT, OPCODE_CALL_INDIRECT,
"call_indirect", "call_indirect",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CALL_INDIRECT_TRUE, OPCODE_CALL_INDIRECT_TRUE,
"call_indirect_true", "call_indirect_true",
OPCODE_SIG_X_V_V, OPCODE_SIG_X_V_V,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE(
OPCODE_CALL_EXTERN,
"call_extern",
OPCODE_SIG_X_S,
OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_RETURN, OPCODE_RETURN,
"return", "return",
OPCODE_SIG_X, OPCODE_SIG_X,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_RETURN_TRUE, OPCODE_RETURN_TRUE,
"return_true", "return_true",
OPCODE_SIG_X_V, OPCODE_SIG_X_V,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE(
OPCODE_SET_RETURN_ADDRESS,
"set_return_address",
OPCODE_SIG_X_V,
0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BRANCH, OPCODE_BRANCH,
"branch", "branch",
OPCODE_SIG_X_L, OPCODE_SIG_X_L,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BRANCH_TRUE, OPCODE_BRANCH_TRUE,
"branch_true", "branch_true",
OPCODE_SIG_X_V_L, OPCODE_SIG_X_V_L,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BRANCH_FALSE, OPCODE_BRANCH_FALSE,
"branch_false", "branch_false",
OPCODE_SIG_X_V_L, OPCODE_SIG_X_V_L,
OPCODE_FLAG_BRANCH); OPCODE_FLAG_BRANCH)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ASSIGN, OPCODE_ASSIGN,
"assign", "assign",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CAST, OPCODE_CAST,
"cast", "cast",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ZERO_EXTEND, OPCODE_ZERO_EXTEND,
"zero_extend", "zero_extend",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SIGN_EXTEND, OPCODE_SIGN_EXTEND,
"sign_extend", "sign_extend",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_TRUNCATE, OPCODE_TRUNCATE,
"truncate", "truncate",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CONVERT, OPCODE_CONVERT,
"convert", "convert",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ROUND, OPCODE_ROUND,
"round", "round",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_CONVERT_I2F, OPCODE_VECTOR_CONVERT_I2F,
"vector_convert_i2f", "vector_convert_i2f",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_CONVERT_F2I, OPCODE_VECTOR_CONVERT_F2I,
"vector_convert_f2i", "vector_convert_f2i",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_VECTOR_SHL, OPCODE_LOAD_VECTOR_SHL,
"load_vector_shl", "load_vector_shl",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_VECTOR_SHR, OPCODE_LOAD_VECTOR_SHR,
"load_vector_shr", "load_vector_shr",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_CLOCK, OPCODE_LOAD_CLOCK,
"load_clock", "load_clock",
OPCODE_SIG_V, OPCODE_SIG_V,
0); 0)
DEFINE_OPCODE(
OPCODE_LOAD_LOCAL,
"load_local",
OPCODE_SIG_V_V,
0)
DEFINE_OPCODE(
OPCODE_STORE_LOCAL,
"store_local",
OPCODE_SIG_X_V_V,
0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_CONTEXT, OPCODE_LOAD_CONTEXT,
"load_context", "load_context",
OPCODE_SIG_V_O, OPCODE_SIG_V_O,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_STORE_CONTEXT, OPCODE_STORE_CONTEXT,
"store_context", "store_context",
OPCODE_SIG_X_O_V, OPCODE_SIG_X_O_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD, OPCODE_LOAD,
"load", "load",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
OPCODE_FLAG_MEMORY); OPCODE_FLAG_MEMORY)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_STORE, OPCODE_STORE,
"store", "store",
OPCODE_SIG_X_V_V, OPCODE_SIG_X_V_V,
OPCODE_FLAG_MEMORY); OPCODE_FLAG_MEMORY)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PREFETCH, OPCODE_PREFETCH,
"prefetch", "prefetch",
OPCODE_SIG_X_V_O, OPCODE_SIG_X_V_O,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MAX, OPCODE_MAX,
"max", "max",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MIN, OPCODE_MIN,
"min", "min",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SELECT, OPCODE_SELECT,
"select", "select",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_IS_TRUE, OPCODE_IS_TRUE,
"is_true", "is_true",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_IS_FALSE, OPCODE_IS_FALSE,
"is_false", "is_false",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_EQ, OPCODE_COMPARE_EQ,
"compare_eq", "compare_eq",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_NE, OPCODE_COMPARE_NE,
"compare_ne", "compare_ne",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_SLT, OPCODE_COMPARE_SLT,
"compare_slt", "compare_slt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_SLE, OPCODE_COMPARE_SLE,
"compare_sle", "compare_sle",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_SGT, OPCODE_COMPARE_SGT,
"compare_sgt", "compare_sgt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_SGE, OPCODE_COMPARE_SGE,
"compare_sge", "compare_sge",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_ULT, OPCODE_COMPARE_ULT,
"compare_ult", "compare_ult",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_ULE, OPCODE_COMPARE_ULE,
"compare_ule", "compare_ule",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_UGT, OPCODE_COMPARE_UGT,
"compare_ugt", "compare_ugt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_UGE, OPCODE_COMPARE_UGE,
"compare_uge", "compare_uge",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DID_CARRY, OPCODE_DID_CARRY,
"did_carry", "did_carry",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); OPCODE_FLAG_PAIRED_PREV)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DID_OVERFLOW, OPCODE_DID_OVERFLOW,
"did_overflow", "did_overflow",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); OPCODE_FLAG_PAIRED_PREV)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DID_SATURATE, OPCODE_DID_SATURATE,
"did_saturate", "did_saturate",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); OPCODE_FLAG_PAIRED_PREV)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_EQ, OPCODE_VECTOR_COMPARE_EQ,
"vector_compare_eq", "vector_compare_eq",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_SGT, OPCODE_VECTOR_COMPARE_SGT,
"vector_compare_sgt", "vector_compare_sgt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_SGE, OPCODE_VECTOR_COMPARE_SGE,
"vector_compare_sge", "vector_compare_sge",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_UGT, OPCODE_VECTOR_COMPARE_UGT,
"vector_compare_ugt", "vector_compare_ugt",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_UGE, OPCODE_VECTOR_COMPARE_UGE,
"vector_compare_uge", "vector_compare_uge",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ADD, OPCODE_ADD,
"add", "add",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ADD_CARRY, OPCODE_ADD_CARRY,
"add_carry", "add_carry",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
OPCODE_FLAG_COMMUNATIVE); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_ADD, OPCODE_VECTOR_ADD,
"vector_add", "vector_add",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SUB, OPCODE_SUB,
"sub", "sub",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL, OPCODE_MUL,
"mul", "mul",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL_HI, OPCODE_MUL_HI,
"mul_hi", "mul_hi",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DIV, OPCODE_DIV,
"div", "div",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL_ADD, OPCODE_MUL_ADD,
"mul_add", "mul_add",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL_SUB, OPCODE_MUL_SUB,
"mul_sub", "mul_sub",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_NEG, OPCODE_NEG,
"neg", "neg",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ABS, OPCODE_ABS,
"abs", "abs",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SQRT, OPCODE_SQRT,
"sqrt", "sqrt",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_RSQRT, OPCODE_RSQRT,
"rsqrt", "rsqrt",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_POW2, OPCODE_POW2,
"pow2", "pow2",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOG2, OPCODE_LOG2,
"log2", "log2",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DOT_PRODUCT_3, OPCODE_DOT_PRODUCT_3,
"dot_product_3", "dot_product_3",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DOT_PRODUCT_4, OPCODE_DOT_PRODUCT_4,
"dot_product_4", "dot_product_4",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_AND, OPCODE_AND,
"and", "and",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_OR, OPCODE_OR,
"or", "or",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_XOR, OPCODE_XOR,
"xor", "xor",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_NOT, OPCODE_NOT,
"not", "not",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SHL, OPCODE_SHL,
"shl", "shl",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_SHL, OPCODE_VECTOR_SHL,
"vector_shl", "vector_shl",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SHR, OPCODE_SHR,
"shr", "shr",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_SHR, OPCODE_VECTOR_SHR,
"vector_shr", "vector_shr",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SHA, OPCODE_SHA,
"sha", "sha",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_SHA, OPCODE_VECTOR_SHA,
"vector_sha", "vector_sha",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ROTATE_LEFT, OPCODE_ROTATE_LEFT,
"rotate_left", "rotate_left",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BYTE_SWAP, OPCODE_BYTE_SWAP,
"byte_swap", "byte_swap",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_CNTLZ, OPCODE_CNTLZ,
"cntlz", "cntlz",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_INSERT, OPCODE_INSERT,
"insert", "insert",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_EXTRACT, OPCODE_EXTRACT,
"extract", "extract",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SPLAT, OPCODE_SPLAT,
"splat", "splat",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PERMUTE, OPCODE_PERMUTE,
"permute", "permute",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_SWIZZLE, OPCODE_SWIZZLE,
"swizzle", "swizzle",
OPCODE_SIG_V_V_O, OPCODE_SIG_V_V_O,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PACK, OPCODE_PACK,
"pack", "pack",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_UNPACK, OPCODE_UNPACK,
"unpack", "unpack",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_EXCHANGE, OPCODE_COMPARE_EXCHANGE,
"compare_exchange", "compare_exchange",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ATOMIC_EXCHANGE, OPCODE_ATOMIC_EXCHANGE,
"atomic_exchange", "atomic_exchange",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ATOMIC_ADD, OPCODE_ATOMIC_ADD,
"atomic_add", "atomic_add",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ATOMIC_SUB, OPCODE_ATOMIC_SUB,
"atomic_sub", "atomic_sub",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0); 0)

View File

@ -1,6 +1,7 @@
# Copyright 2013 Ben Vanik. All Rights Reserved. # Copyright 2013 Ben Vanik. All Rights Reserved.
{ {
'sources': [ 'sources': [
'block.cc',
'block.h', 'block.h',
'hir_builder.cc', 'hir_builder.cc',
'hir_builder.h', 'hir_builder.h',

View File

@ -187,19 +187,26 @@ void Value::Round(RoundMode round_mode) {
XEASSERTALWAYS(); XEASSERTALWAYS();
} }
void Value::Add(Value* other) { bool Value::Add(Value* other) {
#define CHECK_DID_CARRY(v1, v2) (((uint64_t)v2) > ~((uint64_t)v1))
#define ADD_DID_CARRY(a, b) CHECK_DID_CARRY(a, b)
XEASSERT(type == other->type); XEASSERT(type == other->type);
bool did_carry = false;
switch (type) { switch (type) {
case INT8_TYPE: case INT8_TYPE:
did_carry = ADD_DID_CARRY(constant.i8, other->constant.i8);
constant.i8 += other->constant.i8; constant.i8 += other->constant.i8;
break; break;
case INT16_TYPE: case INT16_TYPE:
did_carry = ADD_DID_CARRY(constant.i16, other->constant.i16);
constant.i16 += other->constant.i16; constant.i16 += other->constant.i16;
break; break;
case INT32_TYPE: case INT32_TYPE:
did_carry = ADD_DID_CARRY(constant.i32, other->constant.i32);
constant.i32 += other->constant.i32; constant.i32 += other->constant.i32;
break; break;
case INT64_TYPE: case INT64_TYPE:
did_carry = ADD_DID_CARRY(constant.i64, other->constant.i64);
constant.i64 += other->constant.i64; constant.i64 += other->constant.i64;
break; break;
case FLOAT32_TYPE: case FLOAT32_TYPE:
@ -212,21 +219,28 @@ void Value::Add(Value* other) {
XEASSERTALWAYS(); XEASSERTALWAYS();
break; break;
} }
return did_carry;
} }
void Value::Sub(Value* other) { bool Value::Sub(Value* other) {
#define SUB_DID_CARRY(a, b) (b > a)
XEASSERT(type == other->type); XEASSERT(type == other->type);
bool did_carry = false;
switch (type) { switch (type) {
case INT8_TYPE: case INT8_TYPE:
did_carry = SUB_DID_CARRY(constant.i8, other->constant.i8);
constant.i8 -= other->constant.i8; constant.i8 -= other->constant.i8;
break; break;
case INT16_TYPE: case INT16_TYPE:
did_carry = SUB_DID_CARRY(constant.i16, other->constant.i16);
constant.i16 -= other->constant.i16; constant.i16 -= other->constant.i16;
break; break;
case INT32_TYPE: case INT32_TYPE:
did_carry = SUB_DID_CARRY(constant.i32, other->constant.i32);
constant.i32 -= other->constant.i32; constant.i32 -= other->constant.i32;
break; break;
case INT64_TYPE: case INT64_TYPE:
did_carry = SUB_DID_CARRY(constant.i64, other->constant.i64);
constant.i64 -= other->constant.i64; constant.i64 -= other->constant.i64;
break; break;
case FLOAT32_TYPE: case FLOAT32_TYPE:
@ -239,6 +253,7 @@ void Value::Sub(Value* other) {
XEASSERTALWAYS(); XEASSERTALWAYS();
break; break;
} }
return did_carry;
} }
void Value::Mul(Value* other) { void Value::Mul(Value* other) {
@ -560,6 +575,26 @@ void Value::ByteSwap() {
} }
} }
void Value::CountLeadingZeros(const Value* other) {
switch (other->type) {
case INT8_TYPE:
constant.i8 = static_cast<uint8_t>(__lzcnt16(other->constant.i8) - 8);
break;
case INT16_TYPE:
constant.i8 = static_cast<uint8_t>(__lzcnt16(other->constant.i16));
break;
case INT32_TYPE:
constant.i8 = static_cast<uint8_t>(__lzcnt(other->constant.i32));
break;
case INT64_TYPE:
constant.i8 = static_cast<uint8_t>(__lzcnt64(other->constant.i64));
break;
default:
XEASSERTALWAYS();
break;
}
}
bool Value::Compare(Opcode opcode, Value* other) { bool Value::Compare(Opcode opcode, Value* other) {
// TODO(benvanik): big matrix. // TODO(benvanik): big matrix.
XEASSERTALWAYS(); XEASSERTALWAYS();

View File

@ -11,6 +11,7 @@
#define ALLOY_HIR_VALUE_H_ #define ALLOY_HIR_VALUE_H_
#include <alloy/core.h> #include <alloy/core.h>
#include <alloy/backend/machine_info.h>
#include <alloy/hir/opcodes.h> #include <alloy/hir/opcodes.h>
@ -34,7 +35,32 @@ enum TypeName {
}; };
static bool IsIntType(TypeName type_name) { static bool IsIntType(TypeName type_name) {
return type_name < 4; return type_name <= INT64_TYPE;
}
static bool IsFloatType(TypeName type_name) {
return type_name == FLOAT32_TYPE || type_name == FLOAT64_TYPE;
}
static bool IsVecType(TypeName type_name) {
return type_name == VEC128_TYPE;
}
static size_t GetTypeSize(TypeName type_name) {
switch (type_name) {
case INT8_TYPE:
return 1;
case INT16_TYPE:
return 2;
case INT32_TYPE:
return 4;
case INT64_TYPE:
return 8;
case FLOAT32_TYPE:
return 4;
case FLOAT64_TYPE:
return 8;
default:
case VEC128_TYPE:
return 16;
}
} }
enum ValueFlags { enum ValueFlags {
@ -42,6 +68,10 @@ enum ValueFlags {
VALUE_IS_ALLOCATED = (1 << 2), // Used by backends. Do not set. VALUE_IS_ALLOCATED = (1 << 2), // Used by backends. Do not set.
}; };
struct RegAssignment {
const backend::MachineInfo::RegisterSet* set;
int32_t index;
};
class Value { class Value {
public: public:
@ -65,13 +95,14 @@ public:
TypeName type; TypeName type;
uint32_t flags; uint32_t flags;
uint32_t reg; RegAssignment reg;
ConstantValue constant; ConstantValue constant;
Instr* def; Instr* def;
Use* use_head; Use* use_head;
// NOTE: for performance reasons this is not maintained during construction. // NOTE: for performance reasons this is not maintained during construction.
Instr* last_use; Instr* last_use;
Value* local_slot;
// TODO(benvanik): remove to shrink size. // TODO(benvanik): remove to shrink size.
void* tag; void* tag;
@ -158,25 +189,26 @@ public:
} }
bool IsConstantTrue() const { bool IsConstantTrue() const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; XEASSERTALWAYS();
} }
return (flags & VALUE_IS_CONSTANT) && !!constant.i64; return (flags & VALUE_IS_CONSTANT) && !!constant.i64;
} }
bool IsConstantFalse() const { bool IsConstantFalse() const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; XEASSERTALWAYS();
} }
return (flags & VALUE_IS_CONSTANT) && !constant.i64; return (flags & VALUE_IS_CONSTANT) && !constant.i64;
} }
bool IsConstantZero() const { bool IsConstantZero() const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; return (flags & VALUE_IS_CONSTANT) &&
!constant.v128.low && !constant.v128.high;
} }
return (flags & VALUE_IS_CONSTANT) && !constant.i64; return (flags & VALUE_IS_CONSTANT) && !constant.i64;
} }
bool IsConstantEQ(Value* other) const { bool IsConstantEQ(Value* other) const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; XEASSERTALWAYS();
} }
return (flags & VALUE_IS_CONSTANT) && return (flags & VALUE_IS_CONSTANT) &&
(other->flags & VALUE_IS_CONSTANT) && (other->flags & VALUE_IS_CONSTANT) &&
@ -184,12 +216,156 @@ public:
} }
bool IsConstantNE(Value* other) const { bool IsConstantNE(Value* other) const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; XEASSERTALWAYS();
} }
return (flags & VALUE_IS_CONSTANT) && return (flags & VALUE_IS_CONSTANT) &&
(other->flags & VALUE_IS_CONSTANT) && (other->flags & VALUE_IS_CONSTANT) &&
constant.i64 != other->constant.i64; constant.i64 != other->constant.i64;
} }
bool IsConstantSLT(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return constant.i8 < other->constant.i8;
case INT16_TYPE:
return constant.i16 < other->constant.i16;
case INT32_TYPE:
return constant.i32 < other->constant.i32;
case INT64_TYPE:
return constant.i64 < other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 < other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 < other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantSLE(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return constant.i8 <= other->constant.i8;
case INT16_TYPE:
return constant.i16 <= other->constant.i16;
case INT32_TYPE:
return constant.i32 <= other->constant.i32;
case INT64_TYPE:
return constant.i64 <= other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 <= other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 <= other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantSGT(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return constant.i8 > other->constant.i8;
case INT16_TYPE:
return constant.i16 > other->constant.i16;
case INT32_TYPE:
return constant.i32 > other->constant.i32;
case INT64_TYPE:
return constant.i64 > other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 > other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 > other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantSGE(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return constant.i8 >= other->constant.i8;
case INT16_TYPE:
return constant.i16 >= other->constant.i16;
case INT32_TYPE:
return constant.i32 >= other->constant.i32;
case INT64_TYPE:
return constant.i64 >= other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 >= other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 >= other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantULT(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return (uint8_t)constant.i8 < (uint8_t)other->constant.i8;
case INT16_TYPE:
return (uint16_t)constant.i16 < (uint16_t)other->constant.i16;
case INT32_TYPE:
return (uint32_t)constant.i32 < (uint32_t)other->constant.i32;
case INT64_TYPE:
return (uint64_t)constant.i64 < (uint64_t)other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 < other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 < other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantULE(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return (uint8_t)constant.i8 <= (uint8_t)other->constant.i8;
case INT16_TYPE:
return (uint16_t)constant.i16 <= (uint16_t)other->constant.i16;
case INT32_TYPE:
return (uint32_t)constant.i32 <= (uint32_t)other->constant.i32;
case INT64_TYPE:
return (uint64_t)constant.i64 <= (uint64_t)other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 <= other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 <= other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantUGT(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return (uint8_t)constant.i8 > (uint8_t)other->constant.i8;
case INT16_TYPE:
return (uint16_t)constant.i16 > (uint16_t)other->constant.i16;
case INT32_TYPE:
return (uint32_t)constant.i32 > (uint32_t)other->constant.i32;
case INT64_TYPE:
return (uint64_t)constant.i64 > (uint64_t)other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 > other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 > other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantUGE(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return (uint8_t)constant.i8 >= (uint8_t)other->constant.i8;
case INT16_TYPE:
return (uint16_t)constant.i16 >= (uint16_t)other->constant.i16;
case INT32_TYPE:
return (uint32_t)constant.i32 >= (uint32_t)other->constant.i32;
case INT64_TYPE:
return (uint64_t)constant.i64 >= (uint64_t)other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 >= other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 >= other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
uint32_t AsUint32(); uint32_t AsUint32();
uint64_t AsUint64(); uint64_t AsUint64();
@ -199,8 +375,8 @@ public:
void Truncate(TypeName target_type); void Truncate(TypeName target_type);
void Convert(TypeName target_type, RoundMode round_mode); void Convert(TypeName target_type, RoundMode round_mode);
void Round(RoundMode round_mode); void Round(RoundMode round_mode);
void Add(Value* other); bool Add(Value* other);
void Sub(Value* other); bool Sub(Value* other);
void Mul(Value* other); void Mul(Value* other);
void Div(Value* other); void Div(Value* other);
static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3); static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3);
@ -217,6 +393,7 @@ public:
void Shr(Value* other); void Shr(Value* other);
void Sha(Value* other); void Sha(Value* other);
void ByteSwap(); void ByteSwap();
void CountLeadingZeros(const Value* other);
bool Compare(Opcode opcode, Value* other); bool Compare(Opcode opcode, Value* other);
}; };

View File

@ -9,14 +9,22 @@
#include <alloy/memory.h> #include <alloy/memory.h>
#if !XE_LIKE_WIN32
#include <unistd.h>
#endif
using namespace alloy; using namespace alloy;
Memory::Memory() : Memory::Memory() :
membase_(0) { membase_(0), reserve_address_(0) {
#if XE_LIKE_WIN32
SYSTEM_INFO si; SYSTEM_INFO si;
GetSystemInfo(&si); GetSystemInfo(&si);
system_page_size_ = si.dwPageSize; system_page_size_ = si.dwPageSize;
#else
system_page_size_ = getpagesize();
#endif
} }
Memory::~Memory() { Memory::~Memory() {

View File

@ -34,6 +34,8 @@ public:
}; };
inline uint32_t* reserve_address() { return &reserve_address_; } inline uint32_t* reserve_address() { return &reserve_address_; }
virtual uint64_t page_table() const = 0;
virtual int Initialize(); virtual int Initialize();
void Zero(uint64_t address, size_t size); void Zero(uint64_t address, size_t size);
@ -43,6 +45,15 @@ public:
uint64_t SearchAligned(uint64_t start, uint64_t end, uint64_t SearchAligned(uint64_t start, uint64_t end,
const uint32_t* values, size_t value_count); const uint32_t* values, size_t value_count);
virtual uint8_t LoadI8(uint64_t address) = 0;
virtual uint16_t LoadI16(uint64_t address) = 0;
virtual uint32_t LoadI32(uint64_t address) = 0;
virtual uint64_t LoadI64(uint64_t address) = 0;
virtual void StoreI8(uint64_t address, uint8_t value) = 0;
virtual void StoreI16(uint64_t address, uint16_t value) = 0;
virtual void StoreI32(uint64_t address, uint32_t value) = 0;
virtual void StoreI64(uint64_t address, uint64_t value) = 0;
virtual uint64_t HeapAlloc( virtual uint64_t HeapAlloc(
uint64_t base_address, size_t size, uint32_t flags, uint64_t base_address, size_t size, uint32_t flags,
uint32_t alignment = 0x20) = 0; uint32_t alignment = 0x20) = 0;

View File

@ -62,7 +62,7 @@ SourceMapEntry* DebugInfo::LookupHIROffset(uint64_t offset) {
SourceMapEntry* DebugInfo::LookupCodeOffset(uint64_t offset) { SourceMapEntry* DebugInfo::LookupCodeOffset(uint64_t offset) {
// TODO(benvanik): binary search? We know the list is sorted by code order. // TODO(benvanik): binary search? We know the list is sorted by code order.
for (int n = source_map_count_ - 1; n >= 0; n--) { for (int64_t n = source_map_count_ - 1; n >= 0; n--) {
auto entry = &source_map_[n]; auto entry = &source_map_[n];
if (entry->code_offset <= offset) { if (entry->code_offset <= offset) {
return entry; return entry;

View File

@ -75,6 +75,8 @@ Entry::Status EntryTable::GetOrCreate(uint64_t address, Entry** out_entry) {
} }
std::vector<Function*> EntryTable::FindWithAddress(uint64_t address) { std::vector<Function*> EntryTable::FindWithAddress(uint64_t address) {
SCOPE_profile_cpu_f("alloy");
std::vector<Function*> fns; std::vector<Function*> fns;
LockMutex(lock_); LockMutex(lock_);
for (auto it = map_.begin(); it != map_.end(); ++it) { for (auto it = map_.begin(); it != map_.end(); ++it) {

View File

@ -47,7 +47,7 @@ public:
private: private:
// TODO(benvanik): replace with a better data structure. // TODO(benvanik): replace with a better data structure.
Mutex* lock_; Mutex* lock_;
typedef std::tr1::unordered_map<uint64_t, Entry*> EntryMap; typedef std::unordered_map<uint64_t, Entry*> EntryMap;
EntryMap map_; EntryMap map_;
}; };

View File

@ -17,8 +17,9 @@ using namespace alloy;
using namespace alloy::runtime; using namespace alloy::runtime;
Function::Function(Type type, uint64_t address) : Function::Function(FunctionInfo* symbol_info) :
type_(type), address_(address), debug_info_(0) { address_(symbol_info->address()),
symbol_info_(symbol_info), debug_info_(0) {
// TODO(benvanik): create on demand? // TODO(benvanik): create on demand?
lock_ = AllocMutex(); lock_ = AllocMutex();
} }
@ -72,48 +73,34 @@ Breakpoint* Function::FindBreakpoint(uint64_t address) {
return result; return result;
} }
int Function::Call(ThreadState* thread_state) { int Function::Call(ThreadState* thread_state, uint64_t return_address) {
SCOPE_profile_cpu_f("alloy");
ThreadState* original_thread_state = ThreadState::Get(); ThreadState* original_thread_state = ThreadState::Get();
if (original_thread_state != thread_state) { if (original_thread_state != thread_state) {
ThreadState::Bind(thread_state); ThreadState::Bind(thread_state);
} }
int result = CallImpl(thread_state);
int result = 0;
if (symbol_info_->behavior() == FunctionInfo::BEHAVIOR_EXTERN) {
auto handler = symbol_info_->extern_handler();
if (handler) {
handler(thread_state->raw_context(),
symbol_info_->extern_arg0(),
symbol_info_->extern_arg1());
} else {
XELOGW("undefined extern call to %.8X %s",
symbol_info_->address(),
symbol_info_->name());
result = 1;
}
} else {
CallImpl(thread_state, return_address);
}
if (original_thread_state != thread_state) { if (original_thread_state != thread_state) {
ThreadState::Bind(original_thread_state); ThreadState::Bind(original_thread_state);
} }
return result; return result;
} }
ExternFunction::ExternFunction(
uint64_t address, Handler handler, void* arg0, void* arg1) :
name_(0),
handler_(handler), arg0_(arg0), arg1_(arg1),
Function(Function::EXTERN_FUNCTION, address) {
}
ExternFunction::~ExternFunction() {
if (name_) {
xe_free(name_);
}
}
void ExternFunction::set_name(const char* name) {
name_ = xestrdupa(name);
}
int ExternFunction::CallImpl(ThreadState* thread_state) {
if (!handler_) {
XELOGW("undefined extern call to %.8X %s", address(), name());
return 0;
}
handler_(thread_state->raw_context(), arg0_, arg1_);
return 0;
}
GuestFunction::GuestFunction(FunctionInfo* symbol_info) :
symbol_info_(symbol_info),
Function(Function::USER_FUNCTION, symbol_info->address()) {
}
GuestFunction::~GuestFunction() {
}

View File

@ -24,17 +24,11 @@ class ThreadState;
class Function { class Function {
public: public:
enum Type { Function(FunctionInfo* symbol_info);
UNKNOWN_FUNCTION = 0,
EXTERN_FUNCTION,
USER_FUNCTION,
};
public:
Function(Type type, uint64_t address);
virtual ~Function(); virtual ~Function();
Type type() const { return type_; }
uint64_t address() const { return address_; } uint64_t address() const { return address_; }
FunctionInfo* symbol_info() const { return symbol_info_; }
DebugInfo* debug_info() const { return debug_info_; } DebugInfo* debug_info() const { return debug_info_; }
void set_debug_info(DebugInfo* debug_info) { debug_info_ = debug_info; } void set_debug_info(DebugInfo* debug_info) { debug_info_ = debug_info; }
@ -42,17 +36,18 @@ public:
int AddBreakpoint(Breakpoint* breakpoint); int AddBreakpoint(Breakpoint* breakpoint);
int RemoveBreakpoint(Breakpoint* breakpoint); int RemoveBreakpoint(Breakpoint* breakpoint);
int Call(ThreadState* thread_state); int Call(ThreadState* thread_state, uint64_t return_address);
protected: protected:
Breakpoint* FindBreakpoint(uint64_t address); Breakpoint* FindBreakpoint(uint64_t address);
virtual int AddBreakpointImpl(Breakpoint* breakpoint) { return 0; } virtual int AddBreakpointImpl(Breakpoint* breakpoint) { return 0; }
virtual int RemoveBreakpointImpl(Breakpoint* breakpoint) { return 0; } virtual int RemoveBreakpointImpl(Breakpoint* breakpoint) { return 0; }
virtual int CallImpl(ThreadState* thread_state) = 0; virtual int CallImpl(ThreadState* thread_state,
uint64_t return_address) = 0;
protected: protected:
Type type_;
uint64_t address_; uint64_t address_;
FunctionInfo* symbol_info_;
DebugInfo* debug_info_; DebugInfo* debug_info_;
// TODO(benvanik): move elsewhere? DebugData? // TODO(benvanik): move elsewhere? DebugData?
@ -61,43 +56,6 @@ protected:
}; };
class ExternFunction : public Function {
public:
typedef void(*Handler)(void* context, void* arg0, void* arg1);
public:
ExternFunction(uint64_t address, Handler handler, void* arg0, void* arg1);
virtual ~ExternFunction();
const char* name() const { return name_; }
void set_name(const char* name);
Handler handler() const { return handler_; }
void* arg0() const { return arg0_; }
void* arg1() const { return arg1_; }
protected:
virtual int CallImpl(ThreadState* thread_state);
protected:
char* name_;
Handler handler_;
void* arg0_;
void* arg1_;
};
class GuestFunction : public Function {
public:
GuestFunction(FunctionInfo* symbol_info);
virtual ~GuestFunction();
FunctionInfo* symbol_info() const { return symbol_info_; }
protected:
FunctionInfo* symbol_info_;
};
} // namespace runtime } // namespace runtime
} // namespace alloy } // namespace alloy

View File

@ -161,6 +161,8 @@ SymbolInfo::Status Module::DefineVariable(VariableInfo* symbol_info) {
} }
void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) { void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
SCOPE_profile_cpu_f("alloy");
LockMutex(lock_); LockMutex(lock_);
for (auto it = list_.begin(); it != list_.end(); ++it) { for (auto it = list_.begin(); it != list_.end(); ++it) {
SymbolInfo* symbol_info = *it; SymbolInfo* symbol_info = *it;
@ -174,6 +176,8 @@ void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
void Module::ForEachFunction(size_t since, size_t& version, void Module::ForEachFunction(size_t since, size_t& version,
std::function<void (FunctionInfo*)> callback) { std::function<void (FunctionInfo*)> callback) {
SCOPE_profile_cpu_f("alloy");
LockMutex(lock_); LockMutex(lock_);
size_t count = list_.size(); size_t count = list_.size();
version = count; version = count;

View File

@ -62,7 +62,7 @@ protected:
private: private:
// TODO(benvanik): replace with a better data structure. // TODO(benvanik): replace with a better data structure.
Mutex* lock_; Mutex* lock_;
typedef std::tr1::unordered_map<uint64_t, SymbolInfo*> SymbolMap; typedef std::unordered_map<uint64_t, SymbolInfo*> SymbolMap;
SymbolMap map_; SymbolMap map_;
typedef std::vector<SymbolInfo*> SymbolList; typedef std::vector<SymbolInfo*> SymbolList;
SymbolList list_; SymbolList list_;

View File

@ -1,38 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef ALLOY_RUNTIME_REGISTER_ACCESS_H_
#define ALLOY_RUNTIME_REGISTER_ACCESS_H_
#include <alloy/core.h>
namespace alloy {
namespace runtime {
typedef bool (*RegisterHandlesCallback)(void* context, uint64_t addr);
typedef uint64_t (*RegisterReadCallback)(void* context, uint64_t addr);
typedef void (*RegisterWriteCallback)(void* context, uint64_t addr,
uint64_t value);
typedef struct RegisterAccessCallbacks_s {
void* context;
RegisterHandlesCallback handles;
RegisterReadCallback read;
RegisterWriteCallback write;
RegisterAccessCallbacks_s* next;
} RegisterAccessCallbacks;
} // namespace runtime
} // namespace alloy
#endif // ALLOY_RUNTIME_REGISTER_ACCESS_H_

View File

@ -25,8 +25,7 @@ DEFINE_string(runtime_backend, "any",
Runtime::Runtime(Memory* memory) : Runtime::Runtime(Memory* memory) :
memory_(memory), debugger_(0), backend_(0), frontend_(0), memory_(memory), debugger_(0), backend_(0), frontend_(0) {
access_callbacks_(0) {
tracing::Initialize(); tracing::Initialize();
modules_lock_ = AllocMutex(10000); modules_lock_ = AllocMutex(10000);
} }
@ -41,14 +40,6 @@ Runtime::~Runtime() {
UnlockMutex(modules_lock_); UnlockMutex(modules_lock_);
FreeMutex(modules_lock_); FreeMutex(modules_lock_);
RegisterAccessCallbacks* cbs = access_callbacks_;
while (cbs) {
RegisterAccessCallbacks* next = cbs->next;
delete cbs;
cbs = next;
}
access_callbacks_ = NULL;
delete frontend_; delete frontend_;
delete backend_; delete backend_;
delete debugger_; delete debugger_;
@ -64,11 +55,6 @@ int Runtime::Initialize(Frontend* frontend, Backend* backend) {
// Must be initialized by subclass before calling into this. // Must be initialized by subclass before calling into this.
XEASSERTNOTNULL(memory_); XEASSERTNOTNULL(memory_);
int result = memory_->Initialize();
if (result) {
return result;
}
// Create debugger first. Other types hook up to it. // Create debugger first. Other types hook up to it.
debugger_ = new Debugger(this); debugger_ = new Debugger(this);
@ -91,10 +77,10 @@ int Runtime::Initialize(Frontend* frontend, Backend* backend) {
#endif // ALLOY_HAS_IVM_BACKEND #endif // ALLOY_HAS_IVM_BACKEND
if (FLAGS_runtime_backend == "any") { if (FLAGS_runtime_backend == "any") {
#if defined(ALLOY_HAS_X64_BACKEND) && ALLOY_HAS_X64_BACKEND #if defined(ALLOY_HAS_X64_BACKEND) && ALLOY_HAS_X64_BACKEND
/*if (!backend) { if (!backend) {
backend = new alloy::backend::x64::X64Backend( backend = new alloy::backend::x64::X64Backend(
this); this);
}*/ }
#endif // ALLOY_HAS_X64_BACKEND #endif // ALLOY_HAS_X64_BACKEND
#if defined(ALLOY_HAS_IVM_BACKEND) && ALLOY_HAS_IVM_BACKEND #if defined(ALLOY_HAS_IVM_BACKEND) && ALLOY_HAS_IVM_BACKEND
if (!backend) { if (!backend) {
@ -111,7 +97,7 @@ int Runtime::Initialize(Frontend* frontend, Backend* backend) {
backend_ = backend; backend_ = backend;
frontend_ = frontend; frontend_ = frontend;
result = backend_->Initialize(); int result = backend_->Initialize();
if (result) { if (result) {
return result; return result;
} }
@ -159,6 +145,8 @@ std::vector<Function*> Runtime::FindFunctionsWithAddress(uint64_t address) {
} }
int Runtime::ResolveFunction(uint64_t address, Function** out_function) { int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
SCOPE_profile_cpu_f("alloy");
*out_function = NULL; *out_function = NULL;
Entry* entry; Entry* entry;
Entry::Status status = entry_table_.GetOrCreate(address, &entry); Entry::Status status = entry_table_.GetOrCreate(address, &entry);
@ -192,6 +180,8 @@ int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
int Runtime::LookupFunctionInfo( int Runtime::LookupFunctionInfo(
uint64_t address, FunctionInfo** out_symbol_info) { uint64_t address, FunctionInfo** out_symbol_info) {
SCOPE_profile_cpu_f("alloy");
*out_symbol_info = NULL; *out_symbol_info = NULL;
// TODO(benvanik): fast reject invalid addresses/log errors. // TODO(benvanik): fast reject invalid addresses/log errors.
@ -220,6 +210,8 @@ int Runtime::LookupFunctionInfo(
int Runtime::LookupFunctionInfo(Module* module, uint64_t address, int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
FunctionInfo** out_symbol_info) { FunctionInfo** out_symbol_info) {
SCOPE_profile_cpu_f("alloy");
// Atomic create/lookup symbol in module. // Atomic create/lookup symbol in module.
// If we get back the NEW flag we must declare it now. // If we get back the NEW flag we must declare it now.
FunctionInfo* symbol_info = NULL; FunctionInfo* symbol_info = NULL;
@ -241,6 +233,8 @@ int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
int Runtime::DemandFunction( int Runtime::DemandFunction(
FunctionInfo* symbol_info, Function** out_function) { FunctionInfo* symbol_info, Function** out_function) {
SCOPE_profile_cpu_f("alloy");
*out_function = NULL; *out_function = NULL;
// Lock function for generation. If it's already being generated // Lock function for generation. If it's already being generated
@ -273,11 +267,3 @@ int Runtime::DemandFunction(
return 0; return 0;
} }
void Runtime::AddRegisterAccessCallbacks(
const RegisterAccessCallbacks& callbacks) {
RegisterAccessCallbacks* cbs = new RegisterAccessCallbacks();
xe_copy_struct(cbs, &callbacks, sizeof(callbacks));
cbs->next = access_callbacks_;
access_callbacks_ = cbs;
}

View File

@ -17,7 +17,6 @@
#include <alloy/runtime/debugger.h> #include <alloy/runtime/debugger.h>
#include <alloy/runtime/entry_table.h> #include <alloy/runtime/entry_table.h>
#include <alloy/runtime/module.h> #include <alloy/runtime/module.h>
#include <alloy/runtime/register_access.h>
#include <alloy/runtime/symbol_info.h> #include <alloy/runtime/symbol_info.h>
#include <alloy/runtime/thread_state.h> #include <alloy/runtime/thread_state.h>
@ -38,9 +37,6 @@ public:
Debugger* debugger() const { return debugger_; } Debugger* debugger() const { return debugger_; }
frontend::Frontend* frontend() const { return frontend_; } frontend::Frontend* frontend() const { return frontend_; }
backend::Backend* backend() const { return backend_; } backend::Backend* backend() const { return backend_; }
RegisterAccessCallbacks* access_callbacks() const {
return access_callbacks_;
}
int Initialize(frontend::Frontend* frontend, backend::Backend* backend = 0); int Initialize(frontend::Frontend* frontend, backend::Backend* backend = 0);
@ -55,9 +51,6 @@ public:
FunctionInfo** out_symbol_info); FunctionInfo** out_symbol_info);
int ResolveFunction(uint64_t address, Function** out_function); int ResolveFunction(uint64_t address, Function** out_function);
void AddRegisterAccessCallbacks(
const RegisterAccessCallbacks& callbacks);
//uint32_t CreateCallback(void (*callback)(void* data), void* data); //uint32_t CreateCallback(void (*callback)(void* data), void* data);
private: private:
@ -74,8 +67,6 @@ protected:
EntryTable entry_table_; EntryTable entry_table_;
Mutex* modules_lock_; Mutex* modules_lock_;
ModuleList modules_; ModuleList modules_;
RegisterAccessCallbacks* access_callbacks_;
}; };

View File

@ -15,7 +15,6 @@
'module.h', 'module.h',
'raw_module.cc', 'raw_module.cc',
'raw_module.h', 'raw_module.h',
'register_access.h',
'runtime.cc', 'runtime.cc',
'runtime.h', 'runtime.h',
'symbol_info.cc', 'symbol_info.cc',

View File

@ -34,11 +34,19 @@ void SymbolInfo::set_name(const char* name) {
FunctionInfo::FunctionInfo(Module* module, uint64_t address) : FunctionInfo::FunctionInfo(Module* module, uint64_t address) :
end_address_(0), behavior_(BEHAVIOR_DEFAULT), function_(0), end_address_(0), behavior_(BEHAVIOR_DEFAULT), function_(0),
SymbolInfo(SymbolInfo::TYPE_FUNCTION, module, address) { SymbolInfo(SymbolInfo::TYPE_FUNCTION, module, address) {
xe_zero_struct(&extern_info_, sizeof(extern_info_));
} }
FunctionInfo::~FunctionInfo() { FunctionInfo::~FunctionInfo() {
} }
void FunctionInfo::SetupExtern(ExternHandler handler, void* arg0, void* arg1) {
behavior_ = BEHAVIOR_EXTERN;
extern_info_.handler = handler;
extern_info_.arg0 = arg0;
extern_info_.arg1 = arg1;
}
VariableInfo::VariableInfo(Module* module, uint64_t address) : VariableInfo::VariableInfo(Module* module, uint64_t address) :
SymbolInfo(SymbolInfo::TYPE_VARIABLE, module, address) { SymbolInfo(SymbolInfo::TYPE_VARIABLE, module, address) {
} }

View File

@ -63,6 +63,7 @@ public:
BEHAVIOR_PROLOG, BEHAVIOR_PROLOG,
BEHAVIOR_EPILOG, BEHAVIOR_EPILOG,
BEHAVIOR_EPILOG_RETURN, BEHAVIOR_EPILOG_RETURN,
BEHAVIOR_EXTERN,
}; };
public: public:
@ -79,10 +80,21 @@ public:
Function* function() const { return function_; } Function* function() const { return function_; }
void set_function(Function* value) { function_ = value; } void set_function(Function* value) { function_ = value; }
typedef void(*ExternHandler)(void* context, void* arg0, void* arg1);
void SetupExtern(ExternHandler handler, void* arg0, void* arg1);
ExternHandler extern_handler() const { return extern_info_.handler; }
void* extern_arg0() const { return extern_info_.arg0; }
void* extern_arg1() const { return extern_info_.arg1; }
private: private:
uint64_t end_address_; uint64_t end_address_;
Behavior behavior_; Behavior behavior_;
Function* function_; Function* function_;
struct {
ExternHandler handler;
void* arg0;
void* arg1;
} extern_info_;
}; };
class VariableInfo : public SymbolInfo { class VariableInfo : public SymbolInfo {

View File

@ -64,6 +64,5 @@ ThreadState* ThreadState::Get() {
} }
uint32_t ThreadState::GetThreadID() { uint32_t ThreadState::GetThreadID() {
XEASSERT(thread_state_);
return thread_state_->thread_id_; return thread_state_->thread_id_;
} }

View File

@ -40,46 +40,46 @@ public:
ALLOY_RUNTIME_MEMORY_HEAP_FREE = ALLOY_RUNTIME_MEMORY | (4), ALLOY_RUNTIME_MEMORY_HEAP_FREE = ALLOY_RUNTIME_MEMORY | (4),
}; };
typedef struct { typedef struct Init_s {
static const uint32_t event_type = ALLOY_RUNTIME_INIT; static const uint32_t event_type = ALLOY_RUNTIME_INIT;
} Init; } Init;
typedef struct { typedef struct Deinit_s {
static const uint32_t event_type = ALLOY_RUNTIME_DEINIT; static const uint32_t event_type = ALLOY_RUNTIME_DEINIT;
} Deinit; } Deinit;
typedef struct { typedef struct ThreadInit_s {
static const uint32_t event_type = ALLOY_RUNTIME_THREAD_INIT; static const uint32_t event_type = ALLOY_RUNTIME_THREAD_INIT;
} ThreadInit; } ThreadInit;
typedef struct { typedef struct ThreadDeinit_s {
static const uint32_t event_type = ALLOY_RUNTIME_THREAD_DEINIT; static const uint32_t event_type = ALLOY_RUNTIME_THREAD_DEINIT;
} ThreadDeinit; } ThreadDeinit;
typedef struct { typedef struct MemoryInit_s {
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_INIT; static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_INIT;
// map of memory, etc? // map of memory, etc?
} MemoryInit; } MemoryInit;
typedef struct { typedef struct MemoryDeinit_s {
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_DEINIT; static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_DEINIT;
} MemoryDeinit; } MemoryDeinit;
typedef struct { typedef struct MemoryHeapInit_s {
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_INIT; static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_INIT;
uint32_t heap_id; uint32_t heap_id;
uint64_t low_address; uint64_t low_address;
uint64_t high_address; uint64_t high_address;
uint32_t is_physical; uint32_t is_physical;
} MemoryHeapInit; } MemoryHeapInit;
typedef struct { typedef struct MemoryHeapDeinit_s {
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_DEINIT; static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_DEINIT;
uint32_t heap_id; uint32_t heap_id;
} MemoryHeapDeinit; } MemoryHeapDeinit;
typedef struct { typedef struct MemoryHeapAlloc_s {
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_ALLOC; static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_ALLOC;
uint32_t heap_id; uint32_t heap_id;
uint32_t flags; uint32_t flags;
uint64_t address; uint64_t address;
size_t size; size_t size;
} MemoryHeapAlloc; } MemoryHeapAlloc;
typedef struct { typedef struct MemoryHeapFree_s {
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_FREE; static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_FREE;
uint32_t heap_id; uint32_t heap_id;
uint64_t address; uint64_t address;

Some files were not shown because too many files have changed in this diff Show More