commit
66dc31a01c
19
README.md
19
README.md
|
@ -1,8 +1,7 @@
|
|||
Xenia - Xbox 360 Emulator Research Project
|
||||
==========================================
|
||||
|
||||
Xenia is an experimental emulator for the Xbox 360. It does not run games (yet),
|
||||
and if you are unable to understand that please leave now.
|
||||
Xenia is an experimental emulator for the Xbox 360. It does not run games (yet).
|
||||
|
||||
Pull requests are welcome but the code is in a very high churn state and may not
|
||||
be accepted, so ask in IRC before taking on anything big. Contributions are
|
||||
|
@ -54,7 +53,7 @@ See [building](docs/building.md) for setup and information about the
|
|||
|
||||
Have some spare time, know advanced C++, and want to write an emulator?
|
||||
Contribute! There's a ton of work that needs to be done, a lot of which
|
||||
is wide open greenfield fun.
|
||||
is wide open greenfield fun.
|
||||
|
||||
That said, the project is currently undergoing a lot of major foundational
|
||||
development and core pieces are changing rapidly and poorly documented.
|
||||
|
@ -64,12 +63,12 @@ doing.
|
|||
Fixes and optimizations are always welcome (please!), but in addition to
|
||||
that there are some major work areas still untouched:
|
||||
|
||||
* Help work through missing functionality/bugs in game [compat](https://github.com/benvanik/xenia/issues?labels=compat)
|
||||
* Write an [OpenGL driver](https://github.com/benvanik/xenia/issues/59)
|
||||
* Add input drivers for [OSX](https://github.com/benvanik/xenia/issues/61) and [PS4 controllers](https://github.com/benvanik/xenia/issues/60) (or anything else)
|
||||
* Start [hacking on audio](https://github.com/benvanik/xenia/issues/62)
|
||||
* Support [loading of PIRS files](https://github.com/benvanik/xenia/issues/63)
|
||||
* Build a [virtual LIVE service](https://github.com/benvanik/xenia/issues/64)
|
||||
|
||||
|
||||
See more projects [good for contributors](https://github.com/benvanik/xenia/issues?labels=good+for+contributors&page=1&state=open). It's a good idea to ask on IRC/the bugs before beginning work
|
||||
on something.
|
||||
|
||||
|
@ -85,11 +84,9 @@ Come on people. Jeez.
|
|||
|
||||
### What kind of machine do I need to run this?
|
||||
|
||||
You'll need 64-bit Windows 7 with a processor supporting at least SSE4.
|
||||
It's only tested on Windows 8 and that may become a requirement as several of
|
||||
the APIs exposed there are beneficial to emulation. In general if you have to
|
||||
ask if your machine is good enough to run games at a decent speed the answer is
|
||||
no.
|
||||
You'll need 64-bit Windows 8 with a processor supporting at least AVX2 - in
|
||||
other words, a Haswell. In general if you have to ask if your machine is good
|
||||
enough to run games at a decent speed the answer is no.
|
||||
|
||||
### What about Linux/OSX?
|
||||
|
||||
|
@ -108,7 +105,7 @@ be required in the future.
|
|||
|
||||
I get asked this about once a day. Yes, I have heard of them. In fact, I spent
|
||||
a long time trying them out:
|
||||
[LLVM](https://github.com/benvanik/xenia/tree/85bdbd24d1b5923cfb104f45194a96e7ac57026e/src/xenia/cpu/codegen),
|
||||
[LLVM](https://github.com/benvanik/xenia/tree/85bdbd24d1b5923cfb104f45194a96e7ac57026e/src/xenia/cpu/codegen),
|
||||
[libjit](https://github.com/benvanik/xenia/tree/eee856be0499a4bc721b6097f5f2b9446929f2cc/src/xenia/cpu/libjit),
|
||||
[asmjit](https://github.com/benvanik/xenia/tree/ca208fa60a0285d396409743064784cc2320c094/src/xenia/cpu/x64).
|
||||
They don't work for this purpose. I understand if you disagree, but please
|
||||
|
|
|
@ -18,6 +18,11 @@
|
|||
DECLARE_bool(debug);
|
||||
DECLARE_bool(always_disasm);
|
||||
|
||||
DECLARE_bool(validate_hir);
|
||||
|
||||
DECLARE_uint64(break_on_instruction);
|
||||
DECLARE_uint64(break_on_memory);
|
||||
|
||||
|
||||
namespace alloy {
|
||||
|
||||
|
|
|
@ -21,6 +21,14 @@ using namespace alloy;
|
|||
|
||||
DEFINE_bool(debug, DEFAULT_DEBUG_FLAG,
|
||||
"Allow debugging and retain debug information.");
|
||||
|
||||
DEFINE_bool(always_disasm, false,
|
||||
"Always add debug info to functions, even when no debugger is attached.");
|
||||
|
||||
DEFINE_bool(validate_hir, false,
|
||||
"Perform validation checks on the HIR during compilation.");
|
||||
|
||||
// Breakpoints:
|
||||
DEFINE_uint64(break_on_instruction, 0,
|
||||
"int3 before the given guest address is executed.");
|
||||
DEFINE_uint64(break_on_memory, 0,
|
||||
"int3 on read/write to the given memory address.");
|
||||
|
|
|
@ -18,6 +18,7 @@ using namespace alloy::runtime;
|
|||
|
||||
Backend::Backend(Runtime* runtime) :
|
||||
runtime_(runtime) {
|
||||
xe_zero_struct(&machine_info_, sizeof(machine_info_));
|
||||
}
|
||||
|
||||
Backend::~Backend() {
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define ALLOY_BACKEND_BACKEND_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/backend/machine_info.h>
|
||||
|
||||
|
||||
namespace alloy { namespace runtime { class Runtime; } }
|
||||
|
@ -27,6 +28,7 @@ public:
|
|||
virtual ~Backend();
|
||||
|
||||
runtime::Runtime* runtime() const { return runtime_; }
|
||||
const MachineInfo* machine_info() const { return &machine_info_; }
|
||||
|
||||
virtual int Initialize();
|
||||
|
||||
|
@ -37,6 +39,7 @@ public:
|
|||
|
||||
protected:
|
||||
runtime::Runtime* runtime_;
|
||||
MachineInfo machine_info_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -61,7 +61,6 @@ int IVMAssembler::Assemble(
|
|||
fn->set_debug_info(debug_info);
|
||||
|
||||
TranslationContext ctx;
|
||||
ctx.access_callbacks = backend_->runtime()->access_callbacks();
|
||||
ctx.register_count = 0;
|
||||
ctx.intcode_count = 0;
|
||||
ctx.intcode_arena = &intcode_arena_;
|
||||
|
@ -74,6 +73,19 @@ int IVMAssembler::Assemble(
|
|||
builder->ResetLabelTags();
|
||||
|
||||
// Function prologue.
|
||||
size_t stack_offset = 0;
|
||||
auto locals = builder->locals();
|
||||
for (auto it = locals.begin(); it != locals.end(); ++it) {
|
||||
auto slot = *it;
|
||||
size_t type_size = GetTypeSize(slot->type);
|
||||
// Align to natural size.
|
||||
stack_offset = XEALIGN(stack_offset, type_size);
|
||||
slot->set_constant((uint32_t)stack_offset);
|
||||
stack_offset += type_size;
|
||||
}
|
||||
// Ensure 16b alignment.
|
||||
stack_offset = XEALIGN(stack_offset, 16);
|
||||
ctx.stack_size = stack_offset;
|
||||
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
|
@ -96,7 +108,7 @@ int IVMAssembler::Assemble(
|
|||
// Fixup label references.
|
||||
LabelRef* label_ref = ctx.label_ref_head;
|
||||
while (label_ref) {
|
||||
label_ref->instr->src1_reg = (uint32_t)label_ref->label->tag & ~0x80000000;
|
||||
label_ref->instr->src1_reg = (uint32_t)(intptr_t)label_ref->label->tag & ~0x80000000;
|
||||
label_ref = label_ref->next;
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,20 @@ int IVMBackend::Initialize() {
|
|||
return result;
|
||||
}
|
||||
|
||||
machine_info_.register_sets[0] = {
|
||||
0,
|
||||
"gpr",
|
||||
MachineInfo::RegisterSet::INT_TYPES,
|
||||
16,
|
||||
};
|
||||
machine_info_.register_sets[1] = {
|
||||
1,
|
||||
"vec",
|
||||
MachineInfo::RegisterSet::FLOAT_TYPES |
|
||||
MachineInfo::RegisterSet::VEC_TYPES,
|
||||
16,
|
||||
};
|
||||
|
||||
alloy::tracing::WriteEvent(EventType::Init({
|
||||
}));
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ using namespace alloy::runtime;
|
|||
IVMFunction::IVMFunction(FunctionInfo* symbol_info) :
|
||||
register_count_(0), intcode_count_(0), intcodes_(0),
|
||||
source_map_count_(0), source_map_(0),
|
||||
GuestFunction(symbol_info) {
|
||||
Function(symbol_info) {
|
||||
}
|
||||
|
||||
IVMFunction::~IVMFunction() {
|
||||
|
@ -33,6 +33,7 @@ IVMFunction::~IVMFunction() {
|
|||
|
||||
void IVMFunction::Setup(TranslationContext& ctx) {
|
||||
register_count_ = ctx.register_count;
|
||||
stack_size_ = ctx.stack_size;
|
||||
intcode_count_ = ctx.intcode_count;
|
||||
intcodes_ = (IntCode*)ctx.intcode_arena->CloneContents();
|
||||
source_map_count_ = ctx.source_map_count;
|
||||
|
@ -104,22 +105,25 @@ void IVMFunction::OnBreakpointHit(ThreadState* thread_state, IntCode* i) {
|
|||
|
||||
#undef TRACE_SOURCE_OFFSET
|
||||
|
||||
int IVMFunction::CallImpl(ThreadState* thread_state) {
|
||||
int IVMFunction::CallImpl(ThreadState* thread_state, uint64_t return_address) {
|
||||
// Setup register file on stack.
|
||||
auto stack = (IVMStack*)thread_state->backend_data();
|
||||
auto register_file = (Register*)stack->Alloc(register_count_);
|
||||
auto local_stack = (uint8_t*)alloca(stack_size_);
|
||||
|
||||
Memory* memory = thread_state->memory();
|
||||
|
||||
IntCodeState ics;
|
||||
ics.rf = register_file;
|
||||
ics.locals = local_stack;
|
||||
ics.context = (uint8_t*)thread_state->raw_context();
|
||||
ics.membase = memory->membase();
|
||||
ics.reserve_address = memory->reserve_address();
|
||||
ics.page_table = ics.membase + memory->page_table();
|
||||
ics.did_carry = 0;
|
||||
ics.did_saturate = 0;
|
||||
ics.access_callbacks = thread_state->runtime()->access_callbacks();
|
||||
ics.thread_state = thread_state;
|
||||
ics.return_address = return_address;
|
||||
ics.call_return_address = 0;
|
||||
|
||||
volatile int* suspend_flag_address = thread_state->suspend_flag_address();
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ namespace backend {
|
|||
namespace ivm {
|
||||
|
||||
|
||||
class IVMFunction : public runtime::GuestFunction {
|
||||
class IVMFunction : public runtime::Function {
|
||||
public:
|
||||
IVMFunction(runtime::FunctionInfo* symbol_info);
|
||||
virtual ~IVMFunction();
|
||||
|
@ -31,16 +31,18 @@ public:
|
|||
protected:
|
||||
virtual int AddBreakpointImpl(runtime::Breakpoint* breakpoint);
|
||||
virtual int RemoveBreakpointImpl(runtime::Breakpoint* breakpoint);
|
||||
virtual int CallImpl(runtime::ThreadState* thread_state);
|
||||
virtual int CallImpl(runtime::ThreadState* thread_state,
|
||||
uint64_t return_address);
|
||||
|
||||
private:
|
||||
IntCode* GetIntCodeAtSourceOffset(uint64_t offset);
|
||||
void OnBreakpointHit(runtime::ThreadState* thread_state, IntCode* i);
|
||||
|
||||
private:
|
||||
size_t register_count_;
|
||||
size_t intcode_count_;
|
||||
IntCode* intcodes_;
|
||||
size_t register_count_;
|
||||
size_t stack_size_;
|
||||
size_t intcode_count_;
|
||||
IntCode* intcodes_;
|
||||
size_t source_map_count_;
|
||||
SourceMapEntry* source_map_;
|
||||
};
|
||||
|
|
|
@ -196,213 +196,6 @@ int DispatchToC(TranslationContext& ctx, Instr* i, IntCodeFn fn) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
uint32_t IntCode_LOAD_REGISTER_I8(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
|
||||
(i->src2_reg | ((uint64_t)i->src3_reg << 32));
|
||||
ics.rf[i->dest_reg].i8 = (int8_t)cbs->read(cbs->context, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_REGISTER_I16(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
|
||||
(i->src2_reg | ((uint64_t)i->src3_reg << 32));
|
||||
ics.rf[i->dest_reg].i16 = XESWAP16((int16_t)cbs->read(cbs->context, address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_REGISTER_I32(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
|
||||
(i->src2_reg | ((uint64_t)i->src3_reg << 32));
|
||||
ics.rf[i->dest_reg].i32 = XESWAP32((int32_t)cbs->read(cbs->context, address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_REGISTER_I64(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
|
||||
(i->src2_reg | ((uint64_t)i->src3_reg << 32));
|
||||
ics.rf[i->dest_reg].i64 = XESWAP64((int64_t)cbs->read(cbs->context, address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
int DispatchRegisterRead(
|
||||
TranslationContext& ctx, Instr* i, RegisterAccessCallbacks* cbs) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_LOAD_REGISTER_I8,
|
||||
IntCode_LOAD_REGISTER_I16,
|
||||
IntCode_LOAD_REGISTER_I32,
|
||||
IntCode_LOAD_REGISTER_I64,
|
||||
IntCode_INVALID_TYPE,
|
||||
IntCode_INVALID_TYPE,
|
||||
IntCode_INVALID_TYPE,
|
||||
};
|
||||
IntCodeFn fn = fns[i->dest->type];
|
||||
XEASSERT(fn != IntCode_INVALID_TYPE);
|
||||
uint32_t dest_reg = AllocDynamicRegister(ctx, i->dest);
|
||||
uint32_t src1_reg = AllocOpRegister(ctx, OPCODE_SIG_TYPE_V, &i->src1);
|
||||
ctx.intcode_count++;
|
||||
IntCode* ic = ctx.intcode_arena->Alloc<IntCode>();
|
||||
ic->intcode_fn = fn;
|
||||
ic->flags = i->flags;
|
||||
ic->debug_flags = 0;
|
||||
ic->dest_reg = dest_reg;
|
||||
ic->src1_reg = src1_reg;
|
||||
ic->src2_reg = (uint32_t)((uint64_t)cbs);
|
||||
ic->src3_reg = (uint32_t)(((uint64_t)cbs) >> 32);
|
||||
return 0;
|
||||
}
|
||||
uint32_t IntCode_LOAD_REGISTER_I8_DYNAMIC(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = ics.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
ics.rf[i->dest_reg].i8 = (int8_t)cbs->read(cbs->context, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_REGISTER_I16_DYNAMIC(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = ics.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
ics.rf[i->dest_reg].i16 = XESWAP16((int16_t)cbs->read(cbs->context, address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_REGISTER_I32_DYNAMIC(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = ics.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
ics.rf[i->dest_reg].i32 = XESWAP32((int32_t)cbs->read(cbs->context, address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_REGISTER_I64_DYNAMIC(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = ics.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
ics.rf[i->dest_reg].i64 = XESWAP64((int64_t)cbs->read(cbs->context, address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
|
||||
uint32_t IntCode_STORE_REGISTER_I8(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
|
||||
(i->src3_reg | ((uint64_t)i->dest_reg << 32));
|
||||
cbs->write(cbs->context, address, ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_REGISTER_I16(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
|
||||
(i->src3_reg | ((uint64_t)i->dest_reg << 32));
|
||||
cbs->write(cbs->context, address, XESWAP16(ics.rf[i->src2_reg].i16));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_REGISTER_I32(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
|
||||
(i->src3_reg | ((uint64_t)i->dest_reg << 32));
|
||||
cbs->write(cbs->context, address, XESWAP32(ics.rf[i->src2_reg].i32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_REGISTER_I64(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = (RegisterAccessCallbacks*)
|
||||
(i->src3_reg | ((uint64_t)i->dest_reg << 32));
|
||||
cbs->write(cbs->context, address, XESWAP64(ics.rf[i->src2_reg].i64));
|
||||
return IA_NEXT;
|
||||
}
|
||||
int DispatchRegisterWrite(
|
||||
TranslationContext& ctx, Instr* i, RegisterAccessCallbacks* cbs) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_STORE_REGISTER_I8,
|
||||
IntCode_STORE_REGISTER_I16,
|
||||
IntCode_STORE_REGISTER_I32,
|
||||
IntCode_STORE_REGISTER_I64,
|
||||
IntCode_INVALID_TYPE,
|
||||
IntCode_INVALID_TYPE,
|
||||
IntCode_INVALID_TYPE,
|
||||
};
|
||||
IntCodeFn fn = fns[i->src2.value->type];
|
||||
XEASSERT(fn != IntCode_INVALID_TYPE);
|
||||
uint32_t src1_reg = AllocOpRegister(ctx, OPCODE_SIG_TYPE_V, &i->src1);
|
||||
uint32_t src2_reg = AllocOpRegister(ctx, OPCODE_SIG_TYPE_V, &i->src2);
|
||||
ctx.intcode_count++;
|
||||
IntCode* ic = ctx.intcode_arena->Alloc<IntCode>();
|
||||
ic->intcode_fn = fn;
|
||||
ic->flags = i->flags;
|
||||
ic->debug_flags = 0;
|
||||
ic->dest_reg = (uint32_t)(((uint64_t)cbs) >> 32);
|
||||
ic->src1_reg = src1_reg;
|
||||
ic->src2_reg = src2_reg;
|
||||
ic->src3_reg = (uint32_t)((uint64_t)cbs);
|
||||
return 0;
|
||||
}
|
||||
uint32_t IntCode_STORE_REGISTER_I8_DYNAMIC(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = ics.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
cbs->write(cbs->context, address, ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_REGISTER_I16_DYNAMIC(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = ics.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
cbs->write(cbs->context, address, XESWAP16(ics.rf[i->src2_reg].i16));
|
||||
return IA_NEXT;
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_REGISTER_I32_DYNAMIC(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = ics.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
cbs->write(cbs->context, address, XESWAP32(ics.rf[i->src2_reg].i32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_REGISTER_I64_DYNAMIC(IntCodeState& ics, const IntCode* i) {
|
||||
uint64_t address = ics.rf[i->src1_reg].u32;
|
||||
RegisterAccessCallbacks* cbs = ics.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
cbs->write(cbs->context, address, XESWAP64(ics.rf[i->src2_reg].i64));
|
||||
return IA_NEXT;
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
|
||||
|
||||
uint32_t IntCode_INVALID(IntCodeState& ics, const IntCode* i) {
|
||||
XEASSERTALWAYS();
|
||||
return IA_NEXT;
|
||||
|
@ -417,7 +210,7 @@ int TranslateInvalid(TranslationContext& ctx, Instr* i) {
|
|||
|
||||
uint32_t IntCode_COMMENT(IntCodeState& ics, const IntCode* i) {
|
||||
char* value = (char*)(i->src1_reg | ((uint64_t)i->src2_reg << 32));
|
||||
IPRINT("XE[t] :%d: %s\n", ics.thread_state->GetThreadID(), value);
|
||||
IPRINT("XE[t] :%d: %s\n", ics.thread_state->thread_id(), value);
|
||||
IFLUSH();
|
||||
return IA_NEXT;
|
||||
}
|
||||
|
@ -576,11 +369,15 @@ int Translate_TRAP_TRUE(TranslationContext& ctx, Instr* i) {
|
|||
|
||||
uint32_t IntCode_CALL_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) {
|
||||
FunctionInfo* symbol_info = (FunctionInfo*)ics.rf[reg].u64;
|
||||
Function* fn = NULL;
|
||||
ics.thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn);
|
||||
Function* fn = symbol_info->function();
|
||||
if (!fn) {
|
||||
ics.thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn);
|
||||
}
|
||||
XEASSERTNOTNULL(fn);
|
||||
// TODO(benvanik): proper tail call support, somehow.
|
||||
fn->Call(ics.thread_state);
|
||||
uint64_t return_address =
|
||||
(i->flags & CALL_TAIL) ? ics.return_address : ics.call_return_address;
|
||||
fn->Call(ics.thread_state, return_address);
|
||||
if (i->flags & CALL_TAIL) {
|
||||
return IA_RETURN;
|
||||
}
|
||||
|
@ -645,12 +442,21 @@ int Translate_CALL_TRUE(TranslationContext& ctx, Instr* i) {
|
|||
uint32_t IntCode_CALL_INDIRECT_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) {
|
||||
uint64_t target = ics.rf[reg].u32;
|
||||
|
||||
// Check if return address - if so, return.
|
||||
if (i->flags & CALL_POSSIBLE_RETURN) {
|
||||
if (target == ics.return_address) {
|
||||
return IA_RETURN;
|
||||
}
|
||||
}
|
||||
|
||||
// Real call.
|
||||
Function* fn = NULL;
|
||||
ics.thread_state->runtime()->ResolveFunction(target, &fn);
|
||||
XEASSERTNOTNULL(fn);
|
||||
// TODO(benvanik): proper tail call support, somehow.
|
||||
fn->Call(ics.thread_state);
|
||||
uint64_t return_address =
|
||||
(i->flags & CALL_TAIL) ? ics.return_address : ics.call_return_address;
|
||||
fn->Call(ics.thread_state, return_address);
|
||||
if (i->flags & CALL_TAIL) {
|
||||
return IA_RETURN;
|
||||
}
|
||||
|
@ -712,6 +518,13 @@ int Translate_CALL_INDIRECT_TRUE(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->src1.value->type]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_CALL_EXTERN(IntCodeState& ics, const IntCode* i) {
|
||||
return IntCode_CALL_XX(ics, i, i->src1_reg);
|
||||
}
|
||||
int Translate_CALL_EXTERN(TranslationContext& ctx, Instr* i) {
|
||||
return DispatchToC(ctx, i, IntCode_CALL_EXTERN);
|
||||
}
|
||||
|
||||
uint32_t IntCode_RETURN(IntCodeState& ics, const IntCode* i) {
|
||||
return IA_RETURN;
|
||||
}
|
||||
|
@ -768,6 +581,14 @@ int Translate_RETURN_TRUE(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->src1.value->type]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_SET_RETURN_ADDRESS(IntCodeState& ics, const IntCode* i) {
|
||||
ics.call_return_address = ics.rf[i->src1_reg].u32;
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_SET_RETURN_ADDRESS(TranslationContext& ctx, Instr* i) {
|
||||
return DispatchToC(ctx, i, IntCode_SET_RETURN_ADDRESS);
|
||||
}
|
||||
|
||||
uint32_t IntCode_BRANCH_XX(IntCodeState& ics, const IntCode* i, uint32_t reg) {
|
||||
return ics.rf[reg].u32;
|
||||
}
|
||||
|
@ -1335,34 +1156,116 @@ int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, IntCode_LOAD_CLOCK);
|
||||
}
|
||||
|
||||
uint32_t IntCode_LOAD_LOCAL_I8(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_I16(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_I32(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_I64(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_F32(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].f32 = *((float*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_F64(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].f64 = *((double*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_LOCAL_V128(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_LOAD_LOCAL_I8,
|
||||
IntCode_LOAD_LOCAL_I16,
|
||||
IntCode_LOAD_LOCAL_I32,
|
||||
IntCode_LOAD_LOCAL_I64,
|
||||
IntCode_LOAD_LOCAL_F32,
|
||||
IntCode_LOAD_LOCAL_F64,
|
||||
IntCode_LOAD_LOCAL_V128,
|
||||
};
|
||||
return DispatchToC(ctx, i, fns[i->dest->type]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_STORE_LOCAL_I8(IntCodeState& ics, const IntCode* i) {
|
||||
*((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i8;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_I16(IntCodeState& ics, const IntCode* i) {
|
||||
*((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i16;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_I32(IntCodeState& ics, const IntCode* i) {
|
||||
*((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i32;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_I64(IntCodeState& ics, const IntCode* i) {
|
||||
*((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i64;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_F32(IntCodeState& ics, const IntCode* i) {
|
||||
*((float*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f32;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_F64(IntCodeState& ics, const IntCode* i) {
|
||||
*((double*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f64;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_LOCAL_V128(IntCodeState& ics, const IntCode* i) {
|
||||
*((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].v128;
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_STORE_LOCAL(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_STORE_LOCAL_I8,
|
||||
IntCode_STORE_LOCAL_I16,
|
||||
IntCode_STORE_LOCAL_I32,
|
||||
IntCode_STORE_LOCAL_I64,
|
||||
IntCode_STORE_LOCAL_F32,
|
||||
IntCode_STORE_LOCAL_F64,
|
||||
IntCode_STORE_LOCAL_V128,
|
||||
};
|
||||
return DispatchToC(ctx, i, fns[i->src2.value->type]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_LOAD_CONTEXT_I8(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64));
|
||||
DPRINT("%d (%.X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64);
|
||||
DPRINT("%d (%X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_CONTEXT_I16(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.context + ics.rf[i->src1_reg].u64));
|
||||
DPRINT("%d (%.X) = ctx i16 +%d\n", ics.rf[i->dest_reg].i16, ics.rf[i->dest_reg].u16, ics.rf[i->src1_reg].u64);
|
||||
DPRINT("%d (%X) = ctx i16 +%d\n", ics.rf[i->dest_reg].i16, ics.rf[i->dest_reg].u16, ics.rf[i->src1_reg].u64);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_CONTEXT_I32(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.context + ics.rf[i->src1_reg].u64));
|
||||
DPRINT("%d (%.X) = ctx i32 +%d\n", ics.rf[i->dest_reg].i32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64);
|
||||
DPRINT("%d (%X) = ctx i32 +%d\n", ics.rf[i->dest_reg].i32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_CONTEXT_I64(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.context + ics.rf[i->src1_reg].u64));
|
||||
DPRINT("%lld (%.llX) = ctx i64 +%d\n", ics.rf[i->dest_reg].i64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64);
|
||||
DPRINT("%lld (%llX) = ctx i64 +%d\n", ics.rf[i->dest_reg].i64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_CONTEXT_F32(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].f32 = *((float*)(ics.context + ics.rf[i->src1_reg].u64));
|
||||
DPRINT("%e (%.X) = ctx f32 +%d\n", ics.rf[i->dest_reg].f32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64);
|
||||
DPRINT("%e (%X) = ctx f32 +%d\n", ics.rf[i->dest_reg].f32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_CONTEXT_F64(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].f64 = *((double*)(ics.context + ics.rf[i->src1_reg].u64));
|
||||
DPRINT("%lle (%.llX) = ctx f64 +%d\n", ics.rf[i->dest_reg].f64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64);
|
||||
DPRINT("%lle (%llX) = ctx f64 +%d\n", ics.rf[i->dest_reg].f64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_LOAD_CONTEXT_V128(IntCodeState& ics, const IntCode* i) {
|
||||
|
@ -1388,39 +1291,39 @@ int Translate_LOAD_CONTEXT(TranslationContext& ctx, Instr* i) {
|
|||
|
||||
uint32_t IntCode_STORE_CONTEXT_I8(IntCodeState& ics, const IntCode* i) {
|
||||
*((int8_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i8;
|
||||
DPRINT("ctx i8 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8);
|
||||
DPRINT("ctx i8 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_CONTEXT_I16(IntCodeState& ics, const IntCode* i) {
|
||||
*((int16_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i16;
|
||||
DPRINT("ctx i16 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16);
|
||||
DPRINT("ctx i16 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_CONTEXT_I32(IntCodeState& ics, const IntCode* i) {
|
||||
*((int32_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i32;
|
||||
DPRINT("ctx i32 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32);
|
||||
DPRINT("ctx i32 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_CONTEXT_I64(IntCodeState& ics, const IntCode* i) {
|
||||
*((int64_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i64;
|
||||
DPRINT("ctx i64 +%d = %lld (%.llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64);
|
||||
DPRINT("ctx i64 +%d = %lld (%llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_CONTEXT_F32(IntCodeState& ics, const IntCode* i) {
|
||||
*((float*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f32;
|
||||
DPRINT("ctx f32 +%d = %e (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32);
|
||||
DPRINT("ctx f32 +%d = %e (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_CONTEXT_F64(IntCodeState& ics, const IntCode* i) {
|
||||
*((double*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f64;
|
||||
DPRINT("ctx f64 +%d = %lle (%.llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64);
|
||||
DPRINT("ctx f64 +%d = %lle (%llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_CONTEXT_V128(IntCodeState& ics, const IntCode* i) {
|
||||
*((vec128_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].v128;
|
||||
DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", ics.rf[i->src1_reg].u64,
|
||||
VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3),
|
||||
VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3));
|
||||
VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECI4(ics.rf[i->src2_reg].v128,3));
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_STORE_CONTEXT(TranslationContext& ctx, Instr* i) {
|
||||
|
@ -1439,7 +1342,8 @@ int Translate_STORE_CONTEXT(TranslationContext& ctx, Instr* i) {
|
|||
uint32_t IntCode_LOAD_I8(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
return IntCode_LOAD_REGISTER_I8_DYNAMIC(ics, i);
|
||||
ics.rf[i->dest_reg].i8 = ics.thread_state->memory()->LoadI8(address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
DPRINT("%d (%X) = load.i8 %.8X\n",
|
||||
*((int8_t*)(ics.membase + address)),
|
||||
|
@ -1452,7 +1356,9 @@ uint32_t IntCode_LOAD_I8(IntCodeState& ics, const IntCode* i) {
|
|||
uint32_t IntCode_LOAD_I16(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
return IntCode_LOAD_REGISTER_I16_DYNAMIC(ics, i);
|
||||
ics.rf[i->dest_reg].i16 =
|
||||
XESWAP16(ics.thread_state->memory()->LoadI16(address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
DPRINT("%d (%X) = load.i16 %.8X\n",
|
||||
*((int16_t*)(ics.membase + address)),
|
||||
|
@ -1465,7 +1371,9 @@ uint32_t IntCode_LOAD_I16(IntCodeState& ics, const IntCode* i) {
|
|||
uint32_t IntCode_LOAD_I32(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
return IntCode_LOAD_REGISTER_I32_DYNAMIC(ics, i);
|
||||
ics.rf[i->dest_reg].i32 =
|
||||
XESWAP32(ics.thread_state->memory()->LoadI32(address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
DFLUSH();
|
||||
DPRINT("%d (%X) = load.i32 %.8X\n",
|
||||
|
@ -1479,7 +1387,9 @@ uint32_t IntCode_LOAD_I32(IntCodeState& ics, const IntCode* i) {
|
|||
uint32_t IntCode_LOAD_I64(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
return IntCode_LOAD_REGISTER_I64(ics, i);
|
||||
ics.rf[i->dest_reg].i64 =
|
||||
XESWAP64(ics.thread_state->memory()->LoadI64(address));
|
||||
return IA_NEXT;
|
||||
}
|
||||
DPRINT("%lld (%llX) = load.i64 %.8X\n",
|
||||
*((int64_t*)(ics.membase + address)),
|
||||
|
@ -1515,7 +1425,7 @@ uint32_t IntCode_LOAD_V128(IntCodeState& ics, const IntCode* i) {
|
|||
for (int n = 0; n < 4; n++) {
|
||||
VECI4(dest,n) = *((uint32_t*)(ics.membase + address + n * 4));
|
||||
}
|
||||
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load v128 %.8X\n",
|
||||
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load.v128 %.8X\n",
|
||||
VECF4(dest,0), VECF4(dest,1), VECF4(dest,2), VECF4(dest,3),
|
||||
VECI4(dest,0), VECI4(dest,1), VECI4(dest,2), VECI4(dest,3),
|
||||
address);
|
||||
|
@ -1532,90 +1442,95 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) {
|
|||
IntCode_LOAD_F64,
|
||||
IntCode_LOAD_V128,
|
||||
};
|
||||
if (i->src1.value->IsConstant()) {
|
||||
// Constant address - check register access callbacks.
|
||||
// NOTE: we still will likely want to check on access in debug mode, as
|
||||
// constant propagation may not have happened.
|
||||
uint64_t address = i->src1.value->AsUint64();
|
||||
RegisterAccessCallbacks* cbs = ctx.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
return DispatchRegisterRead(ctx, i, cbs);
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
}
|
||||
return DispatchToC(ctx, i, fns[i->dest->type]);
|
||||
}
|
||||
|
||||
void MarkPageDirty(IntCodeState& ics, uint32_t address) {
|
||||
// 16KB pages.
|
||||
ics.page_table[(address >> 14) & 0x7FFF] = 1;
|
||||
}
|
||||
uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
return IntCode_STORE_REGISTER_I8_DYNAMIC(ics, i);
|
||||
ics.thread_state->memory()->StoreI8(address, ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
DPRINT("store.i8 %.8X = %d (%X)\n",
|
||||
address, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].i8);
|
||||
address, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8);
|
||||
DFLUSH();
|
||||
*((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
return IntCode_STORE_REGISTER_I16_DYNAMIC(ics, i);
|
||||
ics.thread_state->memory()->StoreI16(address,
|
||||
XESWAP16(ics.rf[i->src2_reg].i16));
|
||||
return IA_NEXT;
|
||||
}
|
||||
DPRINT("store.i16 %.8X = %d (%X)\n",
|
||||
address, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].i16);
|
||||
address, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16);
|
||||
DFLUSH();
|
||||
*((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
return IntCode_STORE_REGISTER_I32_DYNAMIC(ics, i);
|
||||
ics.thread_state->memory()->StoreI32(address,
|
||||
XESWAP32(ics.rf[i->src2_reg].i32));
|
||||
return IA_NEXT;
|
||||
}
|
||||
DPRINT("store.i32 %.8X = %d (%X)\n",
|
||||
address, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].i32);
|
||||
address, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32);
|
||||
DFLUSH();
|
||||
*((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
return IntCode_STORE_REGISTER_I64_DYNAMIC(ics, i);
|
||||
ics.thread_state->memory()->StoreI64(address,
|
||||
XESWAP64(ics.rf[i->src2_reg].i64));
|
||||
return IA_NEXT;
|
||||
}
|
||||
DPRINT("store.i64 %.8X = %lld (%llX)\n",
|
||||
address, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].i64);
|
||||
address, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64);
|
||||
DFLUSH();
|
||||
*((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
DPRINT("store.f32 %.8X = %e (%X)\n",
|
||||
address, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].i32);
|
||||
address, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32);
|
||||
DFLUSH();
|
||||
*((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
DPRINT("store.f64 %.8X = %lle (%llX)\n",
|
||||
address, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].i64);
|
||||
address, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64);
|
||||
DFLUSH();
|
||||
*((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
DPRINT("store v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n",
|
||||
DPRINT("store.v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n",
|
||||
address,
|
||||
VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3),
|
||||
VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECI4(ics.rf[i->src2_reg].v128,3));
|
||||
DFLUSH();
|
||||
*((vec128_t*)(ics.membase + address)) = ics.rf[i->src2_reg].v128;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_STORE(TranslationContext& ctx, Instr* i) {
|
||||
|
@ -1628,19 +1543,6 @@ int Translate_STORE(TranslationContext& ctx, Instr* i) {
|
|||
IntCode_STORE_F64,
|
||||
IntCode_STORE_V128,
|
||||
};
|
||||
if (i->src1.value->IsConstant()) {
|
||||
// Constant address - check register access callbacks.
|
||||
// NOTE: we still will likely want to check on access in debug mode, as
|
||||
// constant propagation may not have happened.
|
||||
uint64_t address = i->src1.value->AsUint64();
|
||||
RegisterAccessCallbacks* cbs = ctx.access_callbacks;
|
||||
while (cbs) {
|
||||
if (cbs->handles(cbs->context, address)) {
|
||||
return DispatchRegisterWrite(ctx, i, cbs);
|
||||
}
|
||||
cbs = cbs->next;
|
||||
}
|
||||
}
|
||||
return DispatchToC(ctx, i, fns[i->src2.value->type]);
|
||||
}
|
||||
|
||||
|
@ -2093,19 +1995,19 @@ int Translate_DID_SATURATE(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, IntCode_DID_SATURATE);
|
||||
}
|
||||
|
||||
#define VECTOR_COMPARER(type, value, count, op) \
|
||||
#define VECTOR_COMPARER(type, value, dest_value, count, op) \
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128; \
|
||||
const vec128_t& src2 = ics.rf[i->src2_reg].v128; \
|
||||
vec128_t& dest = ics.rf[i->dest_reg].v128; \
|
||||
for (int n = 0; n < count; n++) { \
|
||||
dest.value[n] = (type)src1.value[n] op (type)src2.value[n]; \
|
||||
dest.dest_value[n] = ((type)src1.value[n] op (type)src2.value[n]) ? 0xFFFFFFFF : 0; \
|
||||
} \
|
||||
return IA_NEXT;
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, ==) };
|
||||
int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_EQ_I8,
|
||||
|
@ -2119,10 +2021,10 @@ int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, 16, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, 8, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, b16, 16, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, s8, 8, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, i4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >) };
|
||||
int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_SGT_I8,
|
||||
|
@ -2136,10 +2038,10 @@ int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, 16, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, 8, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, b16, 16, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, s8, 8, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, i4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >=) };
|
||||
int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_SGE_I8,
|
||||
|
@ -2153,10 +2055,10 @@ int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >) };
|
||||
int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_UGT_I8,
|
||||
|
@ -2170,10 +2072,10 @@ int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >=) };
|
||||
int Translate_VECTOR_COMPARE_UGE(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_UGE_I8,
|
||||
|
@ -2466,9 +2368,9 @@ uint32_t IntCode_SUB_I16_I16(IntCodeState& ics, const IntCode* i) {
|
|||
uint32_t IntCode_SUB_I32_I32(IntCodeState& ics, const IntCode* i) {
|
||||
int32_t a = ics.rf[i->src1_reg].i32; int32_t b = ics.rf[i->src2_reg].i32;
|
||||
if (i->flags == ARITHMETIC_SET_CARRY) {
|
||||
ics.did_carry = a < ~b;
|
||||
ics.did_carry = SUB_DID_CARRY(a, b);
|
||||
}
|
||||
ics.did_carry = SUB_DID_CARRY(a, b);
|
||||
ics.rf[i->dest_reg].i32 = a - b;
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_SUB_I64_I64(IntCodeState& ics, const IntCode* i) {
|
||||
|
@ -3605,17 +3507,17 @@ int Translate_CNTLZ(TranslationContext& ctx, Instr* i) {
|
|||
|
||||
uint32_t IntCode_EXTRACT_INT8_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||
ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i64);
|
||||
ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_EXTRACT_INT16_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||
ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i64);
|
||||
ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_EXTRACT_INT32_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||
ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i64);
|
||||
ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_EXTRACT(TranslationContext& ctx, Instr* i) {
|
||||
|
@ -3817,6 +3719,7 @@ uint32_t IntCode_PACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) {
|
|||
uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) {
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||
dest.ix = dest.iy = 0;
|
||||
dest.iz =
|
||||
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) |
|
||||
DirectX::PackedVector::XMConvertFloatToHalf(src1.y);
|
||||
|
@ -4009,8 +3912,10 @@ static const TranslateFn dispatch_table[] = {
|
|||
Translate_CALL_TRUE,
|
||||
Translate_CALL_INDIRECT,
|
||||
Translate_CALL_INDIRECT_TRUE,
|
||||
Translate_CALL_EXTERN,
|
||||
Translate_RETURN,
|
||||
Translate_RETURN_TRUE,
|
||||
Translate_SET_RETURN_ADDRESS,
|
||||
|
||||
Translate_BRANCH,
|
||||
Translate_BRANCH_TRUE,
|
||||
|
@ -4031,6 +3936,9 @@ static const TranslateFn dispatch_table[] = {
|
|||
|
||||
Translate_LOAD_CLOCK,
|
||||
|
||||
Translate_LOAD_LOCAL,
|
||||
Translate_STORE_LOCAL,
|
||||
|
||||
Translate_LOAD_CONTEXT,
|
||||
Translate_STORE_CONTEXT,
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
#include <alloy/hir/instr.h>
|
||||
#include <alloy/hir/opcodes.h>
|
||||
#include <alloy/runtime/register_access.h>
|
||||
|
||||
namespace alloy { namespace runtime { class ThreadState; } }
|
||||
|
||||
|
@ -41,13 +40,15 @@ typedef union {
|
|||
|
||||
typedef struct {
|
||||
Register* rf;
|
||||
uint8_t* locals;
|
||||
uint8_t* context;
|
||||
uint8_t* membase;
|
||||
uint32_t* reserve_address;
|
||||
uint8_t* page_table;
|
||||
int8_t did_carry;
|
||||
int8_t did_saturate;
|
||||
runtime::RegisterAccessCallbacks* access_callbacks;
|
||||
runtime::ThreadState* thread_state;
|
||||
uint64_t return_address;
|
||||
uint64_t call_return_address;
|
||||
} IntCodeState;
|
||||
|
||||
|
||||
|
@ -95,8 +96,6 @@ typedef struct SourceMapEntry_s {
|
|||
|
||||
|
||||
typedef struct {
|
||||
runtime::RegisterAccessCallbacks* access_callbacks;
|
||||
|
||||
uint32_t register_count;
|
||||
size_t intcode_count;
|
||||
Arena* intcode_arena;
|
||||
|
@ -104,6 +103,7 @@ typedef struct {
|
|||
Arena* source_map_arena;
|
||||
Arena* scratch_arena;
|
||||
LabelRef* label_ref_head;
|
||||
size_t stack_size;
|
||||
} TranslationContext;
|
||||
|
||||
|
||||
|
|
|
@ -32,17 +32,17 @@ public:
|
|||
ALLOY_BACKEND_IVM_ASSEMBLER_DEINIT = ALLOY_BACKEND_IVM_ASSEMBLER | (2),
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
typedef struct Init_s {
|
||||
static const uint32_t event_type = ALLOY_BACKEND_IVM_INIT;
|
||||
} Init;
|
||||
typedef struct {
|
||||
typedef struct Deinit_s {
|
||||
static const uint32_t event_type = ALLOY_BACKEND_IVM_DEINIT;
|
||||
} Deinit;
|
||||
|
||||
typedef struct {
|
||||
typedef struct AssemblerInit_s {
|
||||
static const uint32_t event_type = ALLOY_BACKEND_IVM_ASSEMBLER_INIT;
|
||||
} AssemblerInit;
|
||||
typedef struct {
|
||||
typedef struct AssemblerDeinit_s {
|
||||
static const uint32_t event_type = ALLOY_BACKEND_IVM_ASSEMBLER_DEINIT;
|
||||
} AssemblerDeinit;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_BACKEND_MACHINE_INFO_H_
|
||||
#define ALLOY_BACKEND_MACHINE_INFO_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
|
||||
|
||||
struct MachineInfo {
|
||||
struct RegisterSet {
|
||||
enum Types {
|
||||
INT_TYPES = (1 << 1),
|
||||
FLOAT_TYPES = (1 << 2),
|
||||
VEC_TYPES = (1 << 3),
|
||||
};
|
||||
uint8_t id;
|
||||
char name[4];
|
||||
uint32_t types;
|
||||
uint32_t count;
|
||||
} register_sets[8];
|
||||
};
|
||||
|
||||
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_BACKEND_MACHINE_INFO_H_
|
|
@ -5,6 +5,7 @@
|
|||
'assembler.h',
|
||||
'backend.cc',
|
||||
'backend.h',
|
||||
'machine_info.h',
|
||||
'tracing.h',
|
||||
],
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,71 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/backend/x64/lowering/lowering_table.h>
|
||||
|
||||
#include <alloy/backend/x64/x64_emitter.h>
|
||||
#include <alloy/backend/x64/lowering/lowering_sequences.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend::x64;
|
||||
using namespace alloy::backend::x64::lowering;
|
||||
|
||||
|
||||
LoweringTable::LoweringTable(X64Backend* backend) :
|
||||
backend_(backend) {
|
||||
xe_zero_struct(lookup_, sizeof(lookup_));
|
||||
}
|
||||
|
||||
LoweringTable::~LoweringTable() {
|
||||
for (size_t n = 0; n < XECOUNT(lookup_); n++) {
|
||||
auto entry = lookup_[n];
|
||||
while (entry) {
|
||||
auto next = entry->next;
|
||||
delete entry;
|
||||
entry = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int LoweringTable::Initialize() {
|
||||
RegisterSequences(this);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void LoweringTable::AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn) {
|
||||
auto existing_entry = lookup_[starting_opcode];
|
||||
auto new_entry = new sequence_fn_entry_t();
|
||||
new_entry->fn = fn;
|
||||
new_entry->next = existing_entry;
|
||||
lookup_[starting_opcode] = new_entry;
|
||||
}
|
||||
|
||||
int LoweringTable::ProcessBlock(X64Emitter& e, hir::Block* block) {
|
||||
// Process instructions.
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
bool processed = false;
|
||||
auto entry = lookup_[instr->opcode->num];
|
||||
while (entry) {
|
||||
if ((*entry->fn)(e, instr)) {
|
||||
processed = true;
|
||||
break;
|
||||
}
|
||||
entry = entry->next;
|
||||
}
|
||||
if (!processed) {
|
||||
// No sequence found!
|
||||
XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
|
||||
return 1;
|
||||
instr = e.Advance(instr);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
|
||||
#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/hir/hir_builder.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
class X64Backend;
|
||||
class X64Emitter;
|
||||
namespace lowering {
|
||||
|
||||
|
||||
class LoweringTable {
|
||||
public:
|
||||
LoweringTable(X64Backend* backend);
|
||||
~LoweringTable();
|
||||
|
||||
int Initialize();
|
||||
|
||||
int ProcessBlock(X64Emitter& e, hir::Block* block);
|
||||
|
||||
public:
|
||||
typedef bool(*sequence_fn_t)(X64Emitter& e, hir::Instr*& instr);
|
||||
void AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn);
|
||||
|
||||
private:
|
||||
class sequence_fn_entry_t {
|
||||
public:
|
||||
sequence_fn_t fn;
|
||||
sequence_fn_entry_t* next;
|
||||
};
|
||||
|
||||
// NOTE: this class is shared by multiple threads and is not thread safe.
|
||||
// Do not modify anything after init.
|
||||
X64Backend* backend_;
|
||||
sequence_fn_entry_t* lookup_[hir::__OPCODE_MAX_VALUE];
|
||||
};
|
||||
|
||||
|
||||
} // namespace lowering
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_
|
|
@ -1,9 +0,0 @@
|
|||
# Copyright 2013 Ben Vanik. All Rights Reserved.
|
||||
{
|
||||
'sources': [
|
||||
'lowering_sequences.cc',
|
||||
'lowering_sequences.h',
|
||||
'lowering_table.cc',
|
||||
'lowering_table.h',
|
||||
],
|
||||
}
|
|
@ -12,9 +12,12 @@
|
|||
'x64_emitter.h',
|
||||
'x64_function.cc',
|
||||
'x64_function.h',
|
||||
],
|
||||
|
||||
'includes': [
|
||||
'lowering/sources.gypi',
|
||||
'x64_sequence.inl',
|
||||
'x64_sequences.cc',
|
||||
'x64_sequences.h',
|
||||
'x64_thunk_emitter.cc',
|
||||
'x64_thunk_emitter.h',
|
||||
'x64_tracers.cc',
|
||||
'x64_tracers.h',
|
||||
],
|
||||
}
|
||||
|
|
|
@ -32,17 +32,17 @@ public:
|
|||
ALLOY_BACKEND_X64_ASSEMBLER_DEINIT = ALLOY_BACKEND_X64_ASSEMBLER | (2),
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
typedef struct Init_s {
|
||||
static const uint32_t event_type = ALLOY_BACKEND_X64_INIT;
|
||||
} Init;
|
||||
typedef struct {
|
||||
typedef struct Deinit_s {
|
||||
static const uint32_t event_type = ALLOY_BACKEND_X64_DEINIT;
|
||||
} Deinit;
|
||||
|
||||
typedef struct {
|
||||
typedef struct AssemblerInit_s {
|
||||
static const uint32_t event_type = ALLOY_BACKEND_X64_ASSEMBLER_INIT;
|
||||
} AssemblerInit;
|
||||
typedef struct {
|
||||
typedef struct AssemblerDeinit_s {
|
||||
static const uint32_t event_type = ALLOY_BACKEND_X64_ASSEMBLER_DEINIT;
|
||||
} AssemblerDeinit;
|
||||
};
|
||||
|
|
|
@ -30,7 +30,7 @@ using namespace alloy::runtime;
|
|||
|
||||
X64Assembler::X64Assembler(X64Backend* backend) :
|
||||
x64_backend_(backend),
|
||||
emitter_(0),
|
||||
emitter_(0), allocator_(0),
|
||||
Assembler(backend) {
|
||||
}
|
||||
|
||||
|
@ -39,6 +39,7 @@ X64Assembler::~X64Assembler() {
|
|||
}));
|
||||
|
||||
delete emitter_;
|
||||
delete allocator_;
|
||||
}
|
||||
|
||||
int X64Assembler::Initialize() {
|
||||
|
@ -47,8 +48,8 @@ int X64Assembler::Initialize() {
|
|||
return result;
|
||||
}
|
||||
|
||||
emitter_ = new X64Emitter(x64_backend_,
|
||||
new XbyakAllocator());
|
||||
allocator_ = new XbyakAllocator();
|
||||
emitter_ = new X64Emitter(x64_backend_, allocator_);
|
||||
|
||||
alloy::tracing::WriteEvent(EventType::AssemblerInit({
|
||||
}));
|
||||
|
@ -65,6 +66,8 @@ int X64Assembler::Assemble(
|
|||
FunctionInfo* symbol_info, HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, DebugInfo* debug_info,
|
||||
Function** out_function) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
int result = 0;
|
||||
|
||||
// Lower HIR -> x64.
|
||||
|
@ -82,13 +85,15 @@ int X64Assembler::Assemble(
|
|||
string_buffer_.Reset();
|
||||
}
|
||||
|
||||
X64Function* fn = new X64Function(symbol_info);
|
||||
fn->set_debug_info(debug_info);
|
||||
fn->Setup(machine_code, code_size);
|
||||
{
|
||||
X64Function* fn = new X64Function(symbol_info);
|
||||
fn->set_debug_info(debug_info);
|
||||
fn->Setup(machine_code, code_size);
|
||||
|
||||
*out_function = fn;
|
||||
*out_function = fn;
|
||||
|
||||
result = 0;
|
||||
result = 0;
|
||||
}
|
||||
|
||||
XECLEANUP:
|
||||
Reset();
|
||||
|
|
|
@ -21,6 +21,7 @@ namespace x64 {
|
|||
|
||||
class X64Backend;
|
||||
class X64Emitter;
|
||||
class XbyakAllocator;
|
||||
|
||||
|
||||
class X64Assembler : public Assembler {
|
||||
|
@ -45,6 +46,7 @@ private:
|
|||
private:
|
||||
X64Backend* x64_backend_;
|
||||
X64Emitter* emitter_;
|
||||
XbyakAllocator* allocator_;
|
||||
|
||||
StringBuffer string_buffer_;
|
||||
};
|
||||
|
|
|
@ -12,25 +12,23 @@
|
|||
#include <alloy/backend/x64/tracing.h>
|
||||
#include <alloy/backend/x64/x64_assembler.h>
|
||||
#include <alloy/backend/x64/x64_code_cache.h>
|
||||
#include <alloy/backend/x64/lowering/lowering_table.h>
|
||||
#include <alloy/backend/x64/lowering/lowering_sequences.h>
|
||||
#include <alloy/backend/x64/x64_sequences.h>
|
||||
#include <alloy/backend/x64/x64_thunk_emitter.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::backend::x64;
|
||||
using namespace alloy::backend::x64::lowering;
|
||||
using namespace alloy::runtime;
|
||||
|
||||
|
||||
X64Backend::X64Backend(Runtime* runtime) :
|
||||
code_cache_(0), lowering_table_(0),
|
||||
code_cache_(0),
|
||||
Backend(runtime) {
|
||||
}
|
||||
|
||||
X64Backend::~X64Backend() {
|
||||
alloy::tracing::WriteEvent(EventType::Deinit({
|
||||
}));
|
||||
delete lowering_table_;
|
||||
delete code_cache_;
|
||||
}
|
||||
|
||||
|
@ -40,14 +38,34 @@ int X64Backend::Initialize() {
|
|||
return result;
|
||||
}
|
||||
|
||||
RegisterSequences();
|
||||
|
||||
machine_info_.register_sets[0] = {
|
||||
0,
|
||||
"gpr",
|
||||
MachineInfo::RegisterSet::INT_TYPES,
|
||||
X64Emitter::GPR_COUNT,
|
||||
};
|
||||
machine_info_.register_sets[1] = {
|
||||
1,
|
||||
"xmm",
|
||||
MachineInfo::RegisterSet::FLOAT_TYPES |
|
||||
MachineInfo::RegisterSet::VEC_TYPES,
|
||||
X64Emitter::XMM_COUNT,
|
||||
};
|
||||
|
||||
code_cache_ = new X64CodeCache();
|
||||
result = code_cache_->Initialize();
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
lowering_table_ = new LoweringTable(this);
|
||||
RegisterSequences(lowering_table_);
|
||||
auto allocator = new XbyakAllocator();
|
||||
auto thunk_emitter = new X64ThunkEmitter(this, allocator);
|
||||
host_to_guest_thunk_ = thunk_emitter->EmitHostToGuestThunk();
|
||||
guest_to_host_thunk_ = thunk_emitter->EmitGuestToHostThunk();
|
||||
delete thunk_emitter;
|
||||
delete allocator;
|
||||
|
||||
alloy::tracing::WriteEvent(EventType::Init({
|
||||
}));
|
||||
|
|
|
@ -20,19 +20,22 @@ namespace backend {
|
|||
namespace x64 {
|
||||
|
||||
class X64CodeCache;
|
||||
namespace lowering { class LoweringTable; }
|
||||
|
||||
|
||||
#define ALLOY_HAS_X64_BACKEND 1
|
||||
|
||||
|
||||
typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1);
|
||||
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
|
||||
|
||||
class X64Backend : public Backend {
|
||||
public:
|
||||
X64Backend(runtime::Runtime* runtime);
|
||||
virtual ~X64Backend();
|
||||
|
||||
X64CodeCache* code_cache() const { return code_cache_; }
|
||||
lowering::LoweringTable* lowering_table() const { return lowering_table_; }
|
||||
HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; }
|
||||
GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; }
|
||||
|
||||
virtual int Initialize();
|
||||
|
||||
|
@ -40,7 +43,8 @@ public:
|
|||
|
||||
private:
|
||||
X64CodeCache* code_cache_;
|
||||
lowering::LoweringTable* lowering_table_;
|
||||
HostToGuestThunk host_to_guest_thunk_;
|
||||
GuestToHostThunk guest_to_host_thunk_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -34,14 +34,14 @@ public:
|
|||
const static uint32_t ESTIMATED_FN_SIZE = 512;
|
||||
// Size of unwind info per function.
|
||||
// TODO(benvanik): move this to emitter.
|
||||
const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1);
|
||||
const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1 + 2 + 2);
|
||||
|
||||
void* fn_table_handle;
|
||||
RUNTIME_FUNCTION* fn_table;
|
||||
uint32_t fn_table_count;
|
||||
uint32_t fn_table_capacity;
|
||||
|
||||
void AddTableEntry(uint8_t* code, size_t code_size);
|
||||
void AddTableEntry(uint8_t* code, size_t code_size, size_t stack_size);
|
||||
};
|
||||
|
||||
|
||||
|
@ -73,7 +73,10 @@ int X64CodeCache::Initialize() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) {
|
||||
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
|
||||
size_t stack_size) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Add unwind info into the allocation size. Keep things 16b aligned.
|
||||
code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16);
|
||||
|
||||
|
@ -101,7 +104,7 @@ void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) {
|
|||
active_chunk_->offset += code_size;
|
||||
|
||||
// Add entry to fn table.
|
||||
active_chunk_->AddTableEntry(final_address, code_size);
|
||||
active_chunk_->AddTableEntry(final_address, code_size, stack_size);
|
||||
|
||||
UnlockMutex(lock_);
|
||||
|
||||
|
@ -156,6 +159,27 @@ typedef enum _UNWIND_OP_CODES {
|
|||
UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
|
||||
UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
|
||||
} UNWIND_CODE_OPS;
|
||||
class UNWIND_REGISTER {
|
||||
public:
|
||||
enum _ {
|
||||
RAX = 0,
|
||||
RCX = 1,
|
||||
RDX = 2,
|
||||
RBX = 3,
|
||||
RSP = 4,
|
||||
RBP = 5,
|
||||
RSI = 6,
|
||||
RDI = 7,
|
||||
R8 = 8,
|
||||
R9 = 9,
|
||||
R10 = 10,
|
||||
R11 = 11,
|
||||
R12 = 12,
|
||||
R13 = 13,
|
||||
R14 = 14,
|
||||
R15 = 15,
|
||||
};
|
||||
};
|
||||
|
||||
typedef union _UNWIND_CODE {
|
||||
struct {
|
||||
|
@ -183,7 +207,8 @@ typedef struct _UNWIND_INFO {
|
|||
} UNWIND_INFO, *PUNWIND_INFO;
|
||||
} // namespace
|
||||
|
||||
void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) {
|
||||
void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size,
|
||||
size_t stack_size) {
|
||||
// NOTE: we assume a chunk lock.
|
||||
|
||||
if (fn_table_count + 1 > fn_table_capacity) {
|
||||
|
@ -213,26 +238,57 @@ void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) {
|
|||
size_t unwind_info_offset = offset;
|
||||
offset += UNWIND_INFO_SIZE;
|
||||
|
||||
// TODO(benvanik): take as parameters?
|
||||
bool has_prolog = true;
|
||||
uint8_t prolog_size = 4;
|
||||
uint8_t stack_bytes = 64;
|
||||
if (!stack_size) {
|
||||
uint8_t prolog_size = 0;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = has_prolog ? prolog_size : 0;
|
||||
unwind_info->CountOfCodes = has_prolog ? 1 : 0;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = 0;
|
||||
unwind_info->CountOfCodes = 0;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
} else if (stack_size <= 128) {
|
||||
uint8_t prolog_size = 4;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
|
||||
auto& code_0 = unwind_info->UnwindCode[0];
|
||||
code_0.CodeOffset = 4; // end of instruction + 1 == offset of next instruction
|
||||
code_0.UnwindOp = UWOP_ALLOC_SMALL;
|
||||
code_0.OpInfo = stack_bytes / 8 - 1;
|
||||
XEASSERT(stack_bytes < 128);
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = prolog_size;
|
||||
unwind_info->CountOfCodes = 1;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
|
||||
size_t co = 0;
|
||||
auto& unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.CodeOffset = 14; // end of instruction + 1 == offset of next instruction
|
||||
unwind_code.UnwindOp = UWOP_ALLOC_SMALL;
|
||||
unwind_code.OpInfo = stack_size / 8 - 1;
|
||||
} else {
|
||||
// TODO(benvanik): take as parameters?
|
||||
uint8_t prolog_size = 7;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = prolog_size;
|
||||
unwind_info->CountOfCodes = 3;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
|
||||
size_t co = 0;
|
||||
auto& unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.CodeOffset = 7; // end of instruction + 1 == offset of next instruction
|
||||
unwind_code.UnwindOp = UWOP_ALLOC_LARGE;
|
||||
unwind_code.OpInfo = 0;
|
||||
unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.FrameOffset = (USHORT)(stack_size) / 8;
|
||||
}
|
||||
|
||||
// Add entry.
|
||||
auto& fn_entry = fn_table[fn_table_count++];
|
||||
|
|
|
@ -30,7 +30,7 @@ public:
|
|||
// TODO(benvanik): keep track of code blocks
|
||||
// TODO(benvanik): padding/guards/etc
|
||||
|
||||
void* PlaceCode(void* machine_code, size_t code_size);
|
||||
void* PlaceCode(void* machine_code, size_t code_size, size_t stack_size);
|
||||
|
||||
private:
|
||||
const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024;
|
||||
|
|
|
@ -11,9 +11,14 @@
|
|||
|
||||
#include <alloy/backend/x64/x64_backend.h>
|
||||
#include <alloy/backend/x64/x64_code_cache.h>
|
||||
#include <alloy/backend/x64/lowering/lowering_table.h>
|
||||
#include <alloy/backend/x64/x64_function.h>
|
||||
#include <alloy/backend/x64/x64_sequences.h>
|
||||
#include <alloy/backend/x64/x64_thunk_emitter.h>
|
||||
#include <alloy/hir/hir_builder.h>
|
||||
#include <alloy/runtime/debug_info.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
#include <alloy/runtime/symbol_info.h>
|
||||
#include <alloy/runtime/thread_state.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
|
@ -30,22 +35,38 @@ namespace x64 {
|
|||
|
||||
static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024;
|
||||
|
||||
static const size_t STASH_OFFSET = 32;
|
||||
|
||||
// If we are running with tracing on we have to store the EFLAGS in the stack,
|
||||
// otherwise our calls out to C to print will clear it before DID_CARRY/etc
|
||||
// can get the value.
|
||||
#define STORE_EFLAGS 1
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
const uint32_t X64Emitter::gpr_reg_map_[X64Emitter::GPR_COUNT] = {
|
||||
Operand::RBX,
|
||||
Operand::R12, Operand::R13, Operand::R14, Operand::R15,
|
||||
};
|
||||
|
||||
const uint32_t X64Emitter::xmm_reg_map_[X64Emitter::XMM_COUNT] = {
|
||||
6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
};
|
||||
|
||||
|
||||
X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) :
|
||||
runtime_(backend->runtime()),
|
||||
backend_(backend),
|
||||
code_cache_(backend->code_cache()),
|
||||
allocator_(allocator),
|
||||
current_instr_(0),
|
||||
CodeGenerator(MAX_CODE_SIZE, AutoGrow, allocator) {
|
||||
xe_zero_struct(®_state_, sizeof(reg_state_));
|
||||
}
|
||||
|
||||
X64Emitter::~X64Emitter() {
|
||||
delete allocator_;
|
||||
}
|
||||
|
||||
int X64Emitter::Initialize() {
|
||||
|
@ -53,9 +74,11 @@ int X64Emitter::Initialize() {
|
|||
}
|
||||
|
||||
int X64Emitter::Emit(
|
||||
HIRBuilder* builder,
|
||||
HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, runtime::DebugInfo* debug_info,
|
||||
void*& out_code_address, size_t& out_code_size) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Reset.
|
||||
if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) {
|
||||
source_map_count_ = 0;
|
||||
|
@ -63,14 +86,15 @@ int X64Emitter::Emit(
|
|||
}
|
||||
|
||||
// Fill the generator with code.
|
||||
int result = Emit(builder);
|
||||
size_t stack_size = 0;
|
||||
int result = Emit(builder, stack_size);
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Copy the final code to the cache and relocate it.
|
||||
out_code_size = getSize();
|
||||
out_code_address = Emplace(code_cache_);
|
||||
out_code_address = Emplace(stack_size);
|
||||
|
||||
// Stash source map.
|
||||
if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) {
|
||||
|
@ -82,13 +106,13 @@ int X64Emitter::Emit(
|
|||
return 0;
|
||||
}
|
||||
|
||||
void* X64Emitter::Emplace(X64CodeCache* code_cache) {
|
||||
void* X64Emitter::Emplace(size_t stack_size) {
|
||||
// To avoid changing xbyak, we do a switcharoo here.
|
||||
// top_ points to the Xbyak buffer, and since we are in AutoGrow mode
|
||||
// it has pending relocations. We copy the top_ to our buffer, swap the
|
||||
// pointer, relocate, then return the original scratch pointer for use.
|
||||
uint8_t* old_address = top_;
|
||||
void* new_address = code_cache->PlaceCode(top_, size_);
|
||||
void* new_address = code_cache_->PlaceCode(top_, size_, stack_size);
|
||||
top_ = (uint8_t*)new_address;
|
||||
ready();
|
||||
top_ = old_address;
|
||||
|
@ -96,17 +120,22 @@ void* X64Emitter::Emplace(X64CodeCache* code_cache) {
|
|||
return new_address;
|
||||
}
|
||||
|
||||
int X64Emitter::Emit(HIRBuilder* builder) {
|
||||
// These are the registers we will not be using. All others are fare game.
|
||||
const uint32_t reserved_regs =
|
||||
GetRegBit(rax) |
|
||||
GetRegBit(rcx) |
|
||||
GetRegBit(rdx) |
|
||||
GetRegBit(rsp) |
|
||||
GetRegBit(rbp) |
|
||||
GetRegBit(rsi) |
|
||||
GetRegBit(rdi) |
|
||||
GetRegBit(xmm0);
|
||||
int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
||||
// Calculate stack size. We need to align things to their natural sizes.
|
||||
// This could be much better (sort by type/etc).
|
||||
auto locals = builder->locals();
|
||||
size_t stack_offset = StackLayout::GUEST_STACK_SIZE;
|
||||
for (auto it = locals.begin(); it != locals.end(); ++it) {
|
||||
auto slot = *it;
|
||||
size_t type_size = GetTypeSize(slot->type);
|
||||
// Align to natural size.
|
||||
stack_offset = XEALIGN(stack_offset, type_size);
|
||||
slot->set_constant((uint32_t)stack_offset);
|
||||
stack_offset += type_size;
|
||||
}
|
||||
// Ensure 16b alignment.
|
||||
stack_offset -= StackLayout::GUEST_STACK_SIZE;
|
||||
stack_offset = XEALIGN(stack_offset, 16);
|
||||
|
||||
// Function prolog.
|
||||
// Must be 16b aligned.
|
||||
|
@ -120,20 +149,18 @@ int X64Emitter::Emit(HIRBuilder* builder) {
|
|||
// X64CodeCache, which dynamically generates exception information.
|
||||
// Adding or changing anything here must be matched!
|
||||
const bool emit_prolog = true;
|
||||
const size_t stack_size = 64;
|
||||
const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset;
|
||||
XEASSERT((stack_size + 8) % 16 == 0);
|
||||
out_stack_size = stack_size;
|
||||
stack_size_ = stack_size;
|
||||
if (emit_prolog) {
|
||||
mov(qword[rsp + 16], rdx);
|
||||
mov(qword[rsp + 8], rcx);
|
||||
sub(rsp, stack_size);
|
||||
mov(qword[rsp + 8 * 0], rbx);
|
||||
mov(qword[rsp + 8 * 1], r12);
|
||||
mov(qword[rsp + 8 * 2], r13);
|
||||
mov(qword[rsp + 8 * 3], r14);
|
||||
mov(qword[rsp + 8 * 4], r15);
|
||||
sub(rsp, (uint32_t)stack_size);
|
||||
mov(qword[rsp + StackLayout::GUEST_RCX_HOME], rcx);
|
||||
mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx);
|
||||
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0);
|
||||
mov(rdx, qword[rcx + 8]); // membase
|
||||
}
|
||||
|
||||
auto lowering_table = backend_->lowering_table();
|
||||
|
||||
// Body.
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
|
@ -144,17 +171,17 @@ int X64Emitter::Emit(HIRBuilder* builder) {
|
|||
label = label->next;
|
||||
}
|
||||
|
||||
// Reset reg allocation state.
|
||||
// If we start keeping regs across blocks this needs to change.
|
||||
// We mark a few active so that the allocator doesn't use them.
|
||||
reg_state_.active_regs = reg_state_.live_regs = reserved_regs;
|
||||
|
||||
// Add instructions.
|
||||
// The table will process sequences of instructions to (try to)
|
||||
// generate optimal code.
|
||||
current_instr_ = block->instr_head;
|
||||
if (lowering_table->ProcessBlock(*this, block)) {
|
||||
return 1;
|
||||
// Process instructions.
|
||||
const Instr* instr = block->instr_head;
|
||||
while (instr) {
|
||||
const Instr* new_tail = instr;
|
||||
if (!SelectSequence(*this, instr, &new_tail)) {
|
||||
// No sequence found!
|
||||
XEASSERTALWAYS();
|
||||
XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
|
||||
break;
|
||||
}
|
||||
instr = new_tail;
|
||||
}
|
||||
|
||||
block = block->next;
|
||||
|
@ -163,12 +190,8 @@ int X64Emitter::Emit(HIRBuilder* builder) {
|
|||
// Function epilog.
|
||||
L("epilog");
|
||||
if (emit_prolog) {
|
||||
mov(rbx, qword[rsp + 8 * 0]);
|
||||
mov(r12, qword[rsp + 8 * 1]);
|
||||
mov(r13, qword[rsp + 8 * 2]);
|
||||
mov(r14, qword[rsp + 8 * 3]);
|
||||
mov(r15, qword[rsp + 8 * 4]);
|
||||
add(rsp, stack_size);
|
||||
mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]);
|
||||
add(rsp, (uint32_t)stack_size);
|
||||
}
|
||||
ret();
|
||||
|
||||
|
@ -183,181 +206,398 @@ int X64Emitter::Emit(HIRBuilder* builder) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
void X64Emitter::EvictStaleRegs() {
|
||||
// NOTE: if we are getting called it's because we *need* a register.
|
||||
// We must get rid of something.
|
||||
|
||||
uint32_t current_ordinal = current_instr_->ordinal;
|
||||
|
||||
// Remove any register with no more uses.
|
||||
uint32_t new_live_regs = 0;
|
||||
for (size_t n = 0; n < 32; n++) {
|
||||
uint32_t bit = 1 << n;
|
||||
if (bit & reg_state_.active_regs) {
|
||||
// Register is active and cannot be freed.
|
||||
new_live_regs |= bit;
|
||||
continue;
|
||||
}
|
||||
if (!(bit & reg_state_.live_regs)) {
|
||||
// Register is not alive - nothing to do.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Register is live, not active. Check and see if we get rid of it.
|
||||
auto v = reg_state_.reg_values[n];
|
||||
if (v->last_use->ordinal < current_ordinal) {
|
||||
reg_state_.reg_values[n] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Hrm. We have spilled.
|
||||
if (reg_state_.live_regs == new_live_regs) {
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
|
||||
reg_state_.live_regs = new_live_regs;
|
||||
}
|
||||
|
||||
void X64Emitter::FindFreeRegs(
|
||||
Value* v0, uint32_t& v0_idx, uint32_t v0_flags) {
|
||||
// If the value is already in a register, use it.
|
||||
if (v0->reg != -1) {
|
||||
// Already in a register. Mark active and return.
|
||||
v0_idx = v0->reg;
|
||||
reg_state_.active_regs |= 1 << v0_idx;
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t avail_regs = 0;
|
||||
if (IsIntType(v0->type)) {
|
||||
if (v0_flags & REG_ABCD) {
|
||||
avail_regs = B00001111;
|
||||
} else {
|
||||
avail_regs = 0xFFFF;
|
||||
}
|
||||
} else {
|
||||
avail_regs = 0xFFFF0000;
|
||||
}
|
||||
uint32_t free_regs = avail_regs & ~reg_state_.live_regs;
|
||||
if (!free_regs) {
|
||||
// Need to evict something.
|
||||
EvictStaleRegs();
|
||||
}
|
||||
|
||||
// Find the first available.
|
||||
// We start from the MSB so that we get the non-rNx regs that are often
|
||||
// in short supply.
|
||||
_BitScanReverse((DWORD*)&v0_idx, free_regs);
|
||||
|
||||
reg_state_.active_regs |= 1 << v0_idx;
|
||||
reg_state_.live_regs |= 1 << v0_idx;
|
||||
v0->reg = v0_idx;
|
||||
reg_state_.reg_values[v0_idx] = v0;
|
||||
}
|
||||
|
||||
void X64Emitter::FindFreeRegs(
|
||||
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
||||
Value* v1, uint32_t& v1_idx, uint32_t v1_flags) {
|
||||
// TODO(benvanik): support REG_DEST reuse/etc.
|
||||
// Grab all already-present registers first.
|
||||
// This way we won't spill them trying to get new registers.
|
||||
bool need_v0 = v0->reg == -1;
|
||||
bool need_v1 = v1->reg == -1;
|
||||
if (!need_v0) {
|
||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
||||
}
|
||||
if (!need_v1) {
|
||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
||||
}
|
||||
// Grab any registers we still need. These calls may evict.
|
||||
if (need_v0) {
|
||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
||||
}
|
||||
if (need_v1) {
|
||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::FindFreeRegs(
|
||||
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
||||
Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
|
||||
Value* v2, uint32_t& v2_idx, uint32_t v2_flags) {
|
||||
// TODO(benvanik): support REG_DEST reuse/etc.
|
||||
// Grab all already-present registers first.
|
||||
// This way we won't spill them trying to get new registers.
|
||||
bool need_v0 = v0->reg == -1;
|
||||
bool need_v1 = v1->reg == -1;
|
||||
bool need_v2 = v2->reg == -1;
|
||||
if (!need_v0) {
|
||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
||||
}
|
||||
if (!need_v1) {
|
||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
||||
}
|
||||
if (!need_v2) {
|
||||
FindFreeRegs(v2, v2_idx, v2_flags);
|
||||
}
|
||||
// Grab any registers we still need. These calls may evict.
|
||||
if (need_v0) {
|
||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
||||
}
|
||||
if (need_v1) {
|
||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
||||
}
|
||||
if (need_v2) {
|
||||
FindFreeRegs(v2, v2_idx, v2_flags);
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::FindFreeRegs(
|
||||
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
||||
Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
|
||||
Value* v2, uint32_t& v2_idx, uint32_t v2_flags,
|
||||
Value* v3, uint32_t& v3_idx, uint32_t v3_flags) {
|
||||
// TODO(benvanik): support REG_DEST reuse/etc.
|
||||
// Grab all already-present registers first.
|
||||
// This way we won't spill them trying to get new registers.
|
||||
bool need_v0 = v0->reg == -1;
|
||||
bool need_v1 = v1->reg == -1;
|
||||
bool need_v2 = v2->reg == -1;
|
||||
bool need_v3 = v3->reg == -1;
|
||||
if (!need_v0) {
|
||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
||||
}
|
||||
if (!need_v1) {
|
||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
||||
}
|
||||
if (!need_v2) {
|
||||
FindFreeRegs(v2, v2_idx, v2_flags);
|
||||
}
|
||||
if (!need_v3) {
|
||||
FindFreeRegs(v3, v3_idx, v3_flags);
|
||||
}
|
||||
// Grab any registers we still need. These calls may evict.
|
||||
if (need_v0) {
|
||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
||||
}
|
||||
if (need_v1) {
|
||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
||||
}
|
||||
if (need_v2) {
|
||||
FindFreeRegs(v2, v2_idx, v2_flags);
|
||||
}
|
||||
if (need_v3) {
|
||||
FindFreeRegs(v3, v3_idx, v3_flags);
|
||||
}
|
||||
}
|
||||
|
||||
Instr* X64Emitter::Advance(Instr* i) {
|
||||
auto next = i->next;
|
||||
current_instr_ = next;
|
||||
return next;
|
||||
}
|
||||
|
||||
void X64Emitter::MarkSourceOffset(Instr* i) {
|
||||
void X64Emitter::MarkSourceOffset(const Instr* i) {
|
||||
auto entry = source_map_arena_.Alloc<SourceMapEntry>();
|
||||
entry->source_offset = i->src1.offset;
|
||||
entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal;
|
||||
entry->code_offset = getSize();
|
||||
source_map_count_++;
|
||||
}
|
||||
|
||||
void X64Emitter::DebugBreak() {
|
||||
// TODO(benvanik): notify debugger.
|
||||
db(0xCC);
|
||||
}
|
||||
|
||||
void X64Emitter::Trap() {
|
||||
// 0x0FE00014 is a 'debug print' where r3 = buffer r4 = length
|
||||
// TODO(benvanik): post software interrupt to debugger.
|
||||
db(0xCC);
|
||||
}
|
||||
|
||||
void X64Emitter::UnimplementedInstr(const hir::Instr* i) {
|
||||
// TODO(benvanik): notify debugger.
|
||||
db(0xCC);
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
|
||||
// Total size of ResolveFunctionSymbol call site in bytes.
|
||||
// Used to overwrite it with nops as needed.
|
||||
const size_t TOTAL_RESOLVE_SIZE = 27;
|
||||
const size_t ASM_OFFSET = 2 + 2 + 8 + 2 + 8;
|
||||
|
||||
// Length Assembly Byte Sequence
|
||||
// =================================================================================
|
||||
// 2 bytes 66 NOP 66 90H
|
||||
// 3 bytes NOP DWORD ptr [EAX] 0F 1F 00H
|
||||
// 4 bytes NOP DWORD ptr [EAX + 00H] 0F 1F 40 00H
|
||||
// 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H] 0F 1F 44 00 00H
|
||||
// 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H] 66 0F 1F 44 00 00H
|
||||
// 7 bytes NOP DWORD ptr [EAX + 00000000H] 0F 1F 80 00 00 00 00H
|
||||
// 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H] 0F 1F 84 00 00 00 00 00H
|
||||
// 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00H
|
||||
|
||||
uint64_t ResolveFunctionSymbol(void* raw_context, uint64_t symbol_info_ptr) {
|
||||
// TODO(benvanik): generate this thunk at runtime? or a shim?
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
auto symbol_info = reinterpret_cast<FunctionInfo*>(symbol_info_ptr);
|
||||
|
||||
// Resolve function. This will demand compile as required.
|
||||
Function* fn = NULL;
|
||||
thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn);
|
||||
XEASSERTNOTNULL(fn);
|
||||
auto x64_fn = static_cast<X64Function*>(fn);
|
||||
uint64_t addr = reinterpret_cast<uint64_t>(x64_fn->machine_code());
|
||||
|
||||
// Overwrite the call site.
|
||||
// The return address points to ReloadRCX work after the call.
|
||||
uint64_t return_address = reinterpret_cast<uint64_t>(_ReturnAddress());
|
||||
#pragma pack(push, 1)
|
||||
struct Asm {
|
||||
uint16_t mov_rax;
|
||||
uint64_t rax_constant;
|
||||
uint16_t mov_rdx;
|
||||
uint64_t rdx_constant;
|
||||
uint16_t call_rax;
|
||||
uint8_t mov_rcx[5];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
Asm* code = reinterpret_cast<Asm*>(return_address - ASM_OFFSET);
|
||||
code->rax_constant = addr;
|
||||
code->call_rax = 0x9066;
|
||||
|
||||
// We need to return the target in rax so that it gets called.
|
||||
return addr;
|
||||
}
|
||||
|
||||
void X64Emitter::Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info) {
|
||||
auto fn = reinterpret_cast<X64Function*>(symbol_info->function());
|
||||
// Resolve address to the function to call and store in rax.
|
||||
// TODO(benvanik): caching/etc. For now this makes debugging easier.
|
||||
if (fn) {
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn->machine_code()));
|
||||
} else {
|
||||
size_t start = getSize();
|
||||
// 2b + 8b constant
|
||||
mov(rax, reinterpret_cast<uint64_t>(ResolveFunctionSymbol));
|
||||
// 2b + 8b constant
|
||||
mov(rdx, reinterpret_cast<uint64_t>(symbol_info));
|
||||
// 2b
|
||||
call(rax);
|
||||
// 5b
|
||||
ReloadECX();
|
||||
size_t total_size = getSize() - start;
|
||||
XEASSERT(total_size == TOTAL_RESOLVE_SIZE);
|
||||
// EDX overwritten, don't bother reloading.
|
||||
}
|
||||
|
||||
// Actually jump/call to rax.
|
||||
if (instr->flags & CALL_TAIL) {
|
||||
// Pass the callers return address over.
|
||||
mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||
|
||||
add(rsp, static_cast<uint32_t>(stack_size()));
|
||||
jmp(rax);
|
||||
} else {
|
||||
// Return address is from the previous SET_RETURN_ADDRESS.
|
||||
mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
|
||||
call(rax);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t ResolveFunctionAddress(void* raw_context, uint64_t target_address) {
|
||||
// TODO(benvanik): generate this thunk at runtime? or a shim?
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
|
||||
// TODO(benvanik): required?
|
||||
target_address &= 0xFFFFFFFF;
|
||||
|
||||
Function* fn = NULL;
|
||||
thread_state->runtime()->ResolveFunction(target_address, &fn);
|
||||
XEASSERTNOTNULL(fn);
|
||||
auto x64_fn = static_cast<X64Function*>(fn);
|
||||
return reinterpret_cast<uint64_t>(x64_fn->machine_code());
|
||||
}
|
||||
|
||||
void X64Emitter::CallIndirect(const hir::Instr* instr, const Reg64& reg) {
|
||||
// Check if return.
|
||||
if (instr->flags & CALL_POSSIBLE_RETURN) {
|
||||
cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||
je("epilog", CodeGenerator::T_NEAR);
|
||||
}
|
||||
|
||||
// Resolve address to the function to call and store in rax.
|
||||
// TODO(benvanik): caching/etc. For now this makes debugging easier.
|
||||
if (reg.getIdx() != rdx.getIdx()) {
|
||||
mov(rdx, reg);
|
||||
}
|
||||
CallNative(ResolveFunctionAddress);
|
||||
|
||||
// Actually jump/call to rax.
|
||||
if (instr->flags & CALL_TAIL) {
|
||||
// Pass the callers return address over.
|
||||
mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||
|
||||
add(rsp, static_cast<uint32_t>(stack_size()));
|
||||
jmp(rax);
|
||||
} else {
|
||||
// Return address is from the previous SET_RETURN_ADDRESS.
|
||||
mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
|
||||
call(rax);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t UndefinedCallExtern(void* raw_context, uint64_t symbol_info_ptr) {
|
||||
auto symbol_info = reinterpret_cast<FunctionInfo*>(symbol_info_ptr);
|
||||
XELOGW("undefined extern call to %.8X %s",
|
||||
symbol_info->address(),
|
||||
symbol_info->name());
|
||||
return 0;
|
||||
}
|
||||
void X64Emitter::CallExtern(const hir::Instr* instr, const FunctionInfo* symbol_info) {
|
||||
XEASSERT(symbol_info->behavior() == FunctionInfo::BEHAVIOR_EXTERN);
|
||||
if (!symbol_info->extern_handler()) {
|
||||
CallNative(UndefinedCallExtern, reinterpret_cast<uint64_t>(symbol_info));
|
||||
} else {
|
||||
// rcx = context
|
||||
// rdx = target host function
|
||||
// r8 = arg0
|
||||
// r9 = arg1
|
||||
mov(rdx, reinterpret_cast<uint64_t>(symbol_info->extern_handler()));
|
||||
mov(r8, reinterpret_cast<uint64_t>(symbol_info->extern_arg0()));
|
||||
mov(r9, reinterpret_cast<uint64_t>(symbol_info->extern_arg1()));
|
||||
auto thunk = backend()->guest_to_host_thunk();
|
||||
mov(rax, reinterpret_cast<uint64_t>(thunk));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
// rax = host return
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::CallNative(void* fn) {
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
}
|
||||
|
||||
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context)) {
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
}
|
||||
|
||||
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0)) {
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
}
|
||||
|
||||
void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0) {
|
||||
mov(rdx, arg0);
|
||||
mov(rax, reinterpret_cast<uint64_t>(fn));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
}
|
||||
|
||||
void X64Emitter::CallNativeSafe(void* fn) {
|
||||
// rcx = context
|
||||
// rdx = target host function
|
||||
// r8 = arg0
|
||||
// r9 = arg1
|
||||
mov(rdx, reinterpret_cast<uint64_t>(fn));
|
||||
auto thunk = backend()->guest_to_host_thunk();
|
||||
mov(rax, reinterpret_cast<uint64_t>(thunk));
|
||||
call(rax);
|
||||
ReloadECX();
|
||||
ReloadEDX();
|
||||
// rax = host return
|
||||
}
|
||||
|
||||
void X64Emitter::SetReturnAddress(uint64_t value) {
|
||||
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], value);
|
||||
}
|
||||
|
||||
void X64Emitter::ReloadECX() {
|
||||
mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]);
|
||||
}
|
||||
|
||||
void X64Emitter::ReloadEDX() {
|
||||
mov(rdx, qword[rcx + 8]); // membase
|
||||
}
|
||||
|
||||
void X64Emitter::LoadEflags() {
|
||||
#if STORE_EFLAGS
|
||||
mov(eax, dword[rsp + STASH_OFFSET]);
|
||||
push(rax);
|
||||
popf();
|
||||
#else
|
||||
// EFLAGS already present.
|
||||
#endif // STORE_EFLAGS
|
||||
}
|
||||
|
||||
void X64Emitter::StoreEflags() {
|
||||
#if STORE_EFLAGS
|
||||
pushf();
|
||||
pop(qword[rsp + STASH_OFFSET]);
|
||||
#else
|
||||
// EFLAGS should have CA set?
|
||||
// (so long as we don't fuck with it)
|
||||
#endif // STORE_EFLAGS
|
||||
}
|
||||
|
||||
uint32_t X64Emitter::page_table_address() const {
|
||||
uint64_t addr = runtime_->memory()->page_table();
|
||||
return static_cast<uint32_t>(addr);
|
||||
}
|
||||
|
||||
bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) {
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
return true;
|
||||
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
|
||||
// Negative number that fits in 32bits.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void X64Emitter::MovMem64(const RegExp& addr, uint64_t v) {
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
mov(qword[addr], v);
|
||||
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
|
||||
// Negative number that fits in 32bits.
|
||||
mov(qword[addr], v);
|
||||
} else if (!(v >> 32)) {
|
||||
// All high bits are zero. It'd be nice if we had a way to load a 32bit
|
||||
// immediate without sign extending!
|
||||
// TODO(benvanik): this is super common, find a better way.
|
||||
mov(dword[addr], static_cast<uint32_t>(v));
|
||||
mov(dword[addr + 4], 0);
|
||||
} else {
|
||||
// 64bit number that needs double movs.
|
||||
mov(dword[addr], static_cast<uint32_t>(v));
|
||||
mov(dword[addr + 4], static_cast<uint32_t>(v >> 32));
|
||||
}
|
||||
}
|
||||
|
||||
Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||
static const vec128_t xmm_consts[] = {
|
||||
/* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f),
|
||||
/* XMMOne */ vec128f(1.0f, 1.0f, 1.0f, 1.0f),
|
||||
/* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f),
|
||||
/* XMMMaskX16Y16 */ vec128i(0x0000FFFFu, 0xFFFF0000u, 0x00000000u, 0x00000000u),
|
||||
/* XMMFlipX16Y16 */ vec128i(0x00008000u, 0x00000000u, 0x00000000u, 0x00000000u),
|
||||
/* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f),
|
||||
/* XMMNormalizeX16Y16 */ vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
|
||||
/* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
|
||||
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
|
||||
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
|
||||
/* XMMAbsMaskPS */ vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu),
|
||||
/* XMMAbsMaskPD */ vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, 0x7FFFFFFFu),
|
||||
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
|
||||
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
|
||||
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
|
||||
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du, 0xFFFFFF0Cu, 0xFFFFFF0Fu),
|
||||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
|
||||
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
||||
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
|
||||
/* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0x00000000u),
|
||||
/* XMM255 */ vec128f(255.0f, 255.0f, 255.0f, 255.0f),
|
||||
/* XMMSignMaskI8 */ vec128i(0x80808080u, 0x80808080u, 0x80808080u, 0x80808080u),
|
||||
/* XMMSignMaskI16 */ vec128i(0x80008000u, 0x80008000u, 0x80008000u, 0x80008000u),
|
||||
/* XMMSignMaskI32 */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||
/* XMMSignMaskF32 */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||
};
|
||||
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
|
||||
// prevent this move.
|
||||
// TODO(benvanik): move to predictable location in PPCContext? could then
|
||||
// just do rcx relative addression with no rax overwriting.
|
||||
mov(rax, (uint64_t)&xmm_consts[id]);
|
||||
return ptr[rax];
|
||||
}
|
||||
|
||||
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
|
||||
// http://www.agner.org/optimize/optimizing_assembly.pdf
|
||||
// 13.4 Generating constants
|
||||
if (!v.low && !v.high) {
|
||||
// 0000...
|
||||
vpxor(dest, dest);
|
||||
} else if (v.low == ~0ull && v.high == ~0ull) {
|
||||
// 1111...
|
||||
vpcmpeqb(dest, dest);
|
||||
} else {
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
MovMem64(rsp + STASH_OFFSET, v.low);
|
||||
MovMem64(rsp + STASH_OFFSET + 8, v.high);
|
||||
vmovdqa(dest, ptr[rsp + STASH_OFFSET]);
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, float v) {
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} x = { v };
|
||||
if (!v) {
|
||||
// 0
|
||||
vpxor(dest, dest);
|
||||
} else if (x.i == ~0UL) {
|
||||
// 1111...
|
||||
vpcmpeqb(dest, dest);
|
||||
} else {
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
mov(eax, x.i);
|
||||
vmovd(dest, eax);
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, double v) {
|
||||
union {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} x = { v };
|
||||
if (!v) {
|
||||
// 0
|
||||
vpxor(dest, dest);
|
||||
} else if (x.i == ~0ULL) {
|
||||
// 1111...
|
||||
vpcmpeqb(dest, dest);
|
||||
} else {
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
mov(rax, x.i);
|
||||
vmovq(dest, rax);
|
||||
}
|
||||
}
|
||||
|
||||
Address X64Emitter::StashXmm(const Xmm& r) {
|
||||
auto addr = ptr[rsp + STASH_OFFSET];
|
||||
vmovups(addr, r);
|
||||
return addr;
|
||||
}
|
||||
|
||||
Address X64Emitter::StashXmm(const vec128_t& v) {
|
||||
auto addr = ptr[rsp + STASH_OFFSET];
|
||||
LoadConstantXmm(xmm0, v);
|
||||
vmovups(addr, xmm0);
|
||||
return addr;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
XEDECLARECLASS2(alloy, hir, HIRBuilder);
|
||||
XEDECLARECLASS2(alloy, hir, Instr);
|
||||
XEDECLARECLASS2(alloy, runtime, DebugInfo);
|
||||
XEDECLARECLASS2(alloy, runtime, FunctionInfo);
|
||||
XEDECLARECLASS2(alloy, runtime, Runtime);
|
||||
XEDECLARECLASS2(alloy, runtime, SymbolInfo);
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
|
@ -32,6 +35,35 @@ enum RegisterFlags {
|
|||
REG_ABCD = (1 << 1),
|
||||
};
|
||||
|
||||
enum XmmConst {
|
||||
XMMZero = 0,
|
||||
XMMOne,
|
||||
XMMNegativeOne,
|
||||
XMMMaskX16Y16,
|
||||
XMMFlipX16Y16,
|
||||
XMMFixX16Y16,
|
||||
XMMNormalizeX16Y16,
|
||||
XMM0001,
|
||||
XMM3301,
|
||||
XMMSignMaskPS,
|
||||
XMMSignMaskPD,
|
||||
XMMAbsMaskPS,
|
||||
XMMAbsMaskPD,
|
||||
XMMByteSwapMask,
|
||||
XMMPermuteControl15,
|
||||
XMMPackD3DCOLOR,
|
||||
XMMUnpackD3DCOLOR,
|
||||
XMMOneOver255,
|
||||
XMMShiftMaskPS,
|
||||
XMMShiftByteMask,
|
||||
XMMUnsignedDwordMax,
|
||||
XMM255,
|
||||
XMMSignMaskI8,
|
||||
XMMSignMaskI16,
|
||||
XMMSignMaskI32,
|
||||
XMMSignMaskF32,
|
||||
};
|
||||
|
||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||
class XbyakAllocator : public Xbyak::Allocator {
|
||||
public:
|
||||
|
@ -43,6 +75,9 @@ public:
|
|||
X64Emitter(X64Backend* backend, XbyakAllocator* allocator);
|
||||
virtual ~X64Emitter();
|
||||
|
||||
runtime::Runtime* runtime() const { return runtime_; }
|
||||
X64Backend* backend() const { return backend_; }
|
||||
|
||||
int Initialize();
|
||||
|
||||
int Emit(hir::HIRBuilder* builder,
|
||||
|
@ -50,118 +85,93 @@ public:
|
|||
void*& out_code_address, size_t& out_code_size);
|
||||
|
||||
public:
|
||||
template<typename V0>
|
||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) {
|
||||
uint32_t v0_idx;
|
||||
FindFreeRegs(v0, v0_idx, r0_flags);
|
||||
SetupReg(v0_idx, r0);
|
||||
// Reserved: rsp
|
||||
// Scratch: rax/rcx/rdx
|
||||
// xmm0-2 (could be only xmm0 with some trickery)
|
||||
// Available: rbx, r12-r15 (save to get r8-r11, rbp, rsi, rdi?)
|
||||
// xmm6-xmm15 (save to get xmm3-xmm5)
|
||||
static const int GPR_COUNT = 5;
|
||||
static const int XMM_COUNT = 10;
|
||||
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Reg8& r) {
|
||||
auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = Xbyak::Reg8(idx);
|
||||
}
|
||||
template<typename V0, typename V1>
|
||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||
hir::Value* v1, V1& r1, uint32_t r1_flags) {
|
||||
uint32_t v0_idx, v1_idx;
|
||||
FindFreeRegs(v0, v0_idx, r0_flags,
|
||||
v1, v1_idx, r1_flags);
|
||||
SetupReg(v0_idx, r0);
|
||||
SetupReg(v1_idx, r1);
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Reg16& r) {
|
||||
auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = Xbyak::Reg16(idx);
|
||||
}
|
||||
template<typename V0, typename V1, typename V2>
|
||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||
hir::Value* v1, V1& r1, uint32_t r1_flags,
|
||||
hir::Value* v2, V2& r2, uint32_t r2_flags) {
|
||||
uint32_t v0_idx, v1_idx, v2_idx;
|
||||
FindFreeRegs(v0, v0_idx, r0_flags,
|
||||
v1, v1_idx, r1_flags,
|
||||
v2, v2_idx, r2_flags);
|
||||
SetupReg(v0_idx, r0);
|
||||
SetupReg(v1_idx, r1);
|
||||
SetupReg(v2_idx, r2);
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Reg32& r) {
|
||||
auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = Xbyak::Reg32(idx);
|
||||
}
|
||||
template<typename V0, typename V1, typename V2, typename V3>
|
||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||
hir::Value* v1, V1& r1, uint32_t r1_flags,
|
||||
hir::Value* v2, V2& r2, uint32_t r2_flags,
|
||||
hir::Value* v3, V3& r3, uint32_t r3_flags) {
|
||||
uint32_t v0_idx, v1_idx, v2_idx, v3_idx;
|
||||
FindFreeRegs(v0, v0_idx, r0_flags,
|
||||
v1, v1_idx, r1_flags,
|
||||
v2, v2_idx, r2_flags,
|
||||
v3, v3_idx, r3_flags);
|
||||
SetupReg(v0_idx, r0);
|
||||
SetupReg(v1_idx, r1);
|
||||
SetupReg(v2_idx, r2);
|
||||
SetupReg(v3_idx, r3);
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Reg64& r) {
|
||||
auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = Xbyak::Reg64(idx);
|
||||
}
|
||||
template<typename V0>
|
||||
void EndOp(V0& r0) {
|
||||
reg_state_.active_regs = reg_state_.active_regs ^ GetRegBit(r0);
|
||||
}
|
||||
template<typename V0, typename V1>
|
||||
void EndOp(V0& r0, V1& r1) {
|
||||
reg_state_.active_regs = reg_state_.active_regs ^ (
|
||||
GetRegBit(r0) | GetRegBit(r1));
|
||||
}
|
||||
template<typename V0, typename V1, typename V2>
|
||||
void EndOp(V0& r0, V1& r1, V2& r2) {
|
||||
reg_state_.active_regs = reg_state_.active_regs ^ (
|
||||
GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2));
|
||||
}
|
||||
template<typename V0, typename V1, typename V2, typename V3>
|
||||
void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) {
|
||||
reg_state_.active_regs = reg_state_.active_regs ^ (
|
||||
GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2) | GetRegBit(r3));
|
||||
static void SetupReg(const hir::Value* v, Xbyak::Xmm& r) {
|
||||
auto idx = xmm_reg_map_[v->reg.index];
|
||||
r = Xbyak::Xmm(idx);
|
||||
}
|
||||
|
||||
void EvictStaleRegs();
|
||||
void MarkSourceOffset(const hir::Instr* i);
|
||||
|
||||
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags);
|
||||
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
||||
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags);
|
||||
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
||||
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
|
||||
hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags);
|
||||
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
||||
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
|
||||
hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags,
|
||||
hir::Value* v3, uint32_t& v3_idx, uint32_t v3_flags);
|
||||
void DebugBreak();
|
||||
void Trap();
|
||||
void UnimplementedInstr(const hir::Instr* i);
|
||||
void UnimplementedExtern(const hir::Instr* i);
|
||||
|
||||
static void SetupReg(uint32_t idx, Xbyak::Reg8& r) { r = Xbyak::Reg8(idx); }
|
||||
static void SetupReg(uint32_t idx, Xbyak::Reg16& r) { r = Xbyak::Reg16(idx); }
|
||||
static void SetupReg(uint32_t idx, Xbyak::Reg32& r) { r = Xbyak::Reg32(idx); }
|
||||
static void SetupReg(uint32_t idx, Xbyak::Reg64& r) { r = Xbyak::Reg64(idx); }
|
||||
static void SetupReg(uint32_t idx, Xbyak::Xmm& r) { r = Xbyak::Xmm(idx - 16); }
|
||||
static uint32_t GetRegBit(const Xbyak::Reg8& r) { return 1 << r.getIdx(); }
|
||||
static uint32_t GetRegBit(const Xbyak::Reg16& r) { return 1 << r.getIdx(); }
|
||||
static uint32_t GetRegBit(const Xbyak::Reg32& r) { return 1 << r.getIdx(); }
|
||||
static uint32_t GetRegBit(const Xbyak::Reg64& r) { return 1 << r.getIdx(); }
|
||||
static uint32_t GetRegBit(const Xbyak::Xmm& r) { return 1 << (16 + r.getIdx()); }
|
||||
void Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info);
|
||||
void CallIndirect(const hir::Instr* instr, const Xbyak::Reg64& reg);
|
||||
void CallExtern(const hir::Instr* instr, const runtime::FunctionInfo* symbol_info);
|
||||
void CallNative(void* fn);
|
||||
void CallNative(uint64_t(*fn)(void* raw_context));
|
||||
void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0));
|
||||
void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0);
|
||||
void CallNativeSafe(void* fn);
|
||||
void SetReturnAddress(uint64_t value);
|
||||
void ReloadECX();
|
||||
void ReloadEDX();
|
||||
|
||||
hir::Instr* Advance(hir::Instr* i);
|
||||
// TODO(benvanik): Label for epilog (don't use strings).
|
||||
|
||||
void MarkSourceOffset(hir::Instr* i);
|
||||
void LoadEflags();
|
||||
void StoreEflags();
|
||||
|
||||
private:
|
||||
void* Emplace(X64CodeCache* code_cache);
|
||||
int Emit(hir::HIRBuilder* builder);
|
||||
uint32_t page_table_address() const;
|
||||
|
||||
private:
|
||||
X64Backend* backend_;
|
||||
X64CodeCache* code_cache_;
|
||||
XbyakAllocator* allocator_;
|
||||
// Moves a 64bit immediate into memory.
|
||||
bool ConstantFitsIn32Reg(uint64_t v);
|
||||
void MovMem64(const Xbyak::RegExp& addr, uint64_t v);
|
||||
|
||||
Xbyak::Address GetXmmConstPtr(XmmConst id);
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, float v);
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, double v);
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
|
||||
Xbyak::Address StashXmm(const Xbyak::Xmm& r);
|
||||
Xbyak::Address StashXmm(const vec128_t& v);
|
||||
|
||||
size_t stack_size() const { return stack_size_; }
|
||||
|
||||
protected:
|
||||
void* Emplace(size_t stack_size);
|
||||
int Emit(hir::HIRBuilder* builder, size_t& out_stack_size);
|
||||
|
||||
protected:
|
||||
runtime::Runtime* runtime_;
|
||||
X64Backend* backend_;
|
||||
X64CodeCache* code_cache_;
|
||||
XbyakAllocator* allocator_;
|
||||
|
||||
struct {
|
||||
// Registers currently active within a begin/end op block. These
|
||||
// cannot be reused.
|
||||
uint32_t active_regs;
|
||||
// Registers with values in them.
|
||||
uint32_t live_regs;
|
||||
// Current register values.
|
||||
hir::Value* reg_values[32];
|
||||
} reg_state_;
|
||||
hir::Instr* current_instr_;
|
||||
|
||||
size_t source_map_count_;
|
||||
Arena source_map_arena_;
|
||||
|
||||
size_t stack_size_;
|
||||
|
||||
static const uint32_t gpr_reg_map_[GPR_COUNT];
|
||||
static const uint32_t xmm_reg_map_[XMM_COUNT];
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <alloy/backend/x64/x64_function.h>
|
||||
|
||||
#include <alloy/backend/x64/tracing.h>
|
||||
#include <alloy/backend/x64/x64_backend.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
#include <alloy/runtime/thread_state.h>
|
||||
|
||||
|
@ -21,7 +22,7 @@ using namespace alloy::runtime;
|
|||
|
||||
X64Function::X64Function(FunctionInfo* symbol_info) :
|
||||
machine_code_(NULL), code_size_(0),
|
||||
GuestFunction(symbol_info) {
|
||||
Function(symbol_info) {
|
||||
}
|
||||
|
||||
X64Function::~X64Function() {
|
||||
|
@ -41,8 +42,12 @@ int X64Function::RemoveBreakpointImpl(Breakpoint* breakpoint) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int X64Function::CallImpl(ThreadState* thread_state) {
|
||||
typedef void(*call_t)(void* raw_context, uint8_t* membase);
|
||||
((call_t)machine_code_)(thread_state->raw_context(), thread_state->memory()->membase());
|
||||
int X64Function::CallImpl(ThreadState* thread_state, uint64_t return_address) {
|
||||
auto backend = (X64Backend*)thread_state->runtime()->backend();
|
||||
auto thunk = backend->host_to_guest_thunk();
|
||||
thunk(
|
||||
machine_code_,
|
||||
thread_state->raw_context(),
|
||||
(void*)return_address);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -20,17 +20,21 @@ namespace backend {
|
|||
namespace x64 {
|
||||
|
||||
|
||||
class X64Function : public runtime::GuestFunction {
|
||||
class X64Function : public runtime::Function {
|
||||
public:
|
||||
X64Function(runtime::FunctionInfo* symbol_info);
|
||||
virtual ~X64Function();
|
||||
|
||||
void* machine_code() const { return machine_code_; }
|
||||
size_t code_size() const { return code_size_; }
|
||||
|
||||
void Setup(void* machine_code, size_t code_size);
|
||||
|
||||
protected:
|
||||
virtual int AddBreakpointImpl(runtime::Breakpoint* breakpoint);
|
||||
virtual int RemoveBreakpointImpl(runtime::Breakpoint* breakpoint);
|
||||
virtual int CallImpl(runtime::ThreadState* thread_state);
|
||||
virtual int CallImpl(runtime::ThreadState* thread_state,
|
||||
uint64_t return_address);
|
||||
|
||||
private:
|
||||
void* machine_code_;
|
||||
|
|
|
@ -0,0 +1,744 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
enum KeyType {
|
||||
KEY_TYPE_X = OPCODE_SIG_TYPE_X,
|
||||
KEY_TYPE_L = OPCODE_SIG_TYPE_L,
|
||||
KEY_TYPE_O = OPCODE_SIG_TYPE_O,
|
||||
KEY_TYPE_S = OPCODE_SIG_TYPE_S,
|
||||
KEY_TYPE_V_I8 = OPCODE_SIG_TYPE_V + INT8_TYPE,
|
||||
KEY_TYPE_V_I16 = OPCODE_SIG_TYPE_V + INT16_TYPE,
|
||||
KEY_TYPE_V_I32 = OPCODE_SIG_TYPE_V + INT32_TYPE,
|
||||
KEY_TYPE_V_I64 = OPCODE_SIG_TYPE_V + INT64_TYPE,
|
||||
KEY_TYPE_V_F32 = OPCODE_SIG_TYPE_V + FLOAT32_TYPE,
|
||||
KEY_TYPE_V_F64 = OPCODE_SIG_TYPE_V + FLOAT64_TYPE,
|
||||
KEY_TYPE_V_V128 = OPCODE_SIG_TYPE_V + VEC128_TYPE,
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union InstrKey {
|
||||
struct {
|
||||
uint32_t opcode : 8;
|
||||
uint32_t dest : 5;
|
||||
uint32_t src1 : 5;
|
||||
uint32_t src2 : 5;
|
||||
uint32_t src3 : 5;
|
||||
uint32_t reserved : 4;
|
||||
};
|
||||
uint32_t value;
|
||||
|
||||
operator uint32_t() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
InstrKey() : value(0) {}
|
||||
InstrKey(uint32_t v) : value(v) {}
|
||||
InstrKey(const Instr* i) : value(0) {
|
||||
opcode = i->opcode->num;
|
||||
uint32_t sig = i->opcode->signature;
|
||||
dest = GET_OPCODE_SIG_TYPE_DEST(sig) ? OPCODE_SIG_TYPE_V + i->dest->type : 0;
|
||||
src1 = GET_OPCODE_SIG_TYPE_SRC1(sig);
|
||||
if (src1 == OPCODE_SIG_TYPE_V) {
|
||||
src1 += i->src1.value->type;
|
||||
}
|
||||
src2 = GET_OPCODE_SIG_TYPE_SRC2(sig);
|
||||
if (src2 == OPCODE_SIG_TYPE_V) {
|
||||
src2 += i->src2.value->type;
|
||||
}
|
||||
src3 = GET_OPCODE_SIG_TYPE_SRC3(sig);
|
||||
if (src3 == OPCODE_SIG_TYPE_V) {
|
||||
src3 += i->src3.value->type;
|
||||
}
|
||||
}
|
||||
|
||||
template <Opcode OPCODE,
|
||||
KeyType DEST = KEY_TYPE_X,
|
||||
KeyType SRC1 = KEY_TYPE_X,
|
||||
KeyType SRC2 = KEY_TYPE_X,
|
||||
KeyType SRC3 = KEY_TYPE_X>
|
||||
struct Construct {
|
||||
static const uint32_t value =
|
||||
(OPCODE) | (DEST << 8) | (SRC1 << 13) | (SRC2 << 18) | (SRC3 << 23);
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
static_assert(sizeof(InstrKey) <= 4, "Key must be 4 bytes");
|
||||
|
||||
template <typename... Ts>
|
||||
struct CombinedStruct;
|
||||
template <>
|
||||
struct CombinedStruct<> {};
|
||||
template <typename T, typename... Ts>
|
||||
struct CombinedStruct<T, Ts...> : T, CombinedStruct<Ts...> {};
|
||||
|
||||
struct OpBase {};
|
||||
|
||||
template <typename T, KeyType KEY_TYPE>
|
||||
struct Op : OpBase {
|
||||
static const KeyType key_type = KEY_TYPE;
|
||||
};
|
||||
|
||||
struct VoidOp : Op<VoidOp, KEY_TYPE_X> {
|
||||
protected:
|
||||
template <typename T, KeyType KEY_TYPE> friend struct Op;
|
||||
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
|
||||
void Load(const Instr::Op& op) {}
|
||||
};
|
||||
|
||||
struct OffsetOp : Op<OffsetOp, KEY_TYPE_O> {
|
||||
uint64_t value;
|
||||
protected:
|
||||
template <typename T, KeyType KEY_TYPE> friend struct Op;
|
||||
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
|
||||
void Load(const Instr::Op& op) {
|
||||
this->value = op.offset;
|
||||
}
|
||||
};
|
||||
|
||||
struct SymbolOp : Op<SymbolOp, KEY_TYPE_S> {
|
||||
FunctionInfo* value;
|
||||
protected:
|
||||
template <typename T, KeyType KEY_TYPE> friend struct Op;
|
||||
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
|
||||
bool Load(const Instr::Op& op) {
|
||||
this->value = op.symbol_info;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct LabelOp : Op<LabelOp, KEY_TYPE_L> {
|
||||
hir::Label* value;
|
||||
protected:
|
||||
template <typename T, KeyType KEY_TYPE> friend struct Op;
|
||||
template <hir::Opcode OPCODE, typename... Ts> friend struct I;
|
||||
void Load(const Instr::Op& op) {
|
||||
this->value = op.label;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, KeyType KEY_TYPE, typename REG_TYPE, typename CONST_TYPE, int TAG = -1>
|
||||
struct ValueOp : Op<ValueOp<T, KEY_TYPE, REG_TYPE, CONST_TYPE, TAG>, KEY_TYPE> {
|
||||
typedef REG_TYPE reg_type;
|
||||
static const int tag = TAG;
|
||||
const Value* value;
|
||||
bool is_constant;
|
||||
virtual bool ConstantFitsIn32Reg() const { return true; }
|
||||
const REG_TYPE& reg() const {
|
||||
XEASSERT(!is_constant);
|
||||
return reg_;
|
||||
}
|
||||
operator const REG_TYPE&() const {
|
||||
return reg();
|
||||
}
|
||||
bool IsEqual(const T& b) const {
|
||||
if (is_constant && b.is_constant) {
|
||||
return reinterpret_cast<const T*>(this)->constant() == b.constant();
|
||||
} else if (!is_constant && !b.is_constant) {
|
||||
return reg_.getIdx() == b.reg_.getIdx();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool IsEqual(const Xbyak::Reg& b) const {
|
||||
if (is_constant) {
|
||||
return false;
|
||||
} else if (!is_constant) {
|
||||
return reg_.getIdx() == b.getIdx();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool operator== (const T& b) const {
|
||||
return IsEqual(b);
|
||||
}
|
||||
bool operator!= (const T& b) const {
|
||||
return !IsEqual(b);
|
||||
}
|
||||
bool operator== (const Xbyak::Reg& b) const {
|
||||
return IsEqual(b);
|
||||
}
|
||||
bool operator!= (const Xbyak::Reg& b) const {
|
||||
return !IsEqual(b);
|
||||
}
|
||||
void Load(const Instr::Op& op) {
|
||||
const Value* value = op.value;
|
||||
this->value = value;
|
||||
is_constant = value->IsConstant();
|
||||
if (!is_constant) {
|
||||
X64Emitter::SetupReg(value, reg_);
|
||||
}
|
||||
}
|
||||
protected:
|
||||
REG_TYPE reg_;
|
||||
};
|
||||
|
||||
template <int TAG = -1>
|
||||
struct I8 : ValueOp<I8<TAG>, KEY_TYPE_V_I8, Reg8, int8_t, TAG> {
|
||||
const int8_t constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.i8;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct I16 : ValueOp<I16<TAG>, KEY_TYPE_V_I16, Reg16, int16_t, TAG> {
|
||||
const int16_t constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.i16;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct I32 : ValueOp<I32<TAG>, KEY_TYPE_V_I32, Reg32, int32_t, TAG> {
|
||||
const int32_t constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.i32;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct I64 : ValueOp<I64<TAG>, KEY_TYPE_V_I64, Reg64, int64_t, TAG> {
|
||||
const int64_t constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.i64;
|
||||
}
|
||||
bool ConstantFitsIn32Reg() const override {
|
||||
int64_t v = value->constant.i64;
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
return true;
|
||||
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
|
||||
// Negative number that fits in 32bits.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct F32 : ValueOp<F32<TAG>, KEY_TYPE_V_F32, Xmm, float, TAG> {
|
||||
const float constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.f32;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct F64 : ValueOp<F64<TAG>, KEY_TYPE_V_F64, Xmm, double, TAG> {
|
||||
const double constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.f64;
|
||||
}
|
||||
};
|
||||
template <int TAG = -1>
|
||||
struct V128 : ValueOp<V128<TAG>, KEY_TYPE_V_V128, Xmm, vec128_t, TAG> {
|
||||
const vec128_t& constant() const {
|
||||
XEASSERT(is_constant);
|
||||
return value->constant.v128;
|
||||
}
|
||||
};
|
||||
|
||||
struct TagTable {
|
||||
struct {
|
||||
bool valid;
|
||||
Instr::Op op;
|
||||
} table[16];
|
||||
|
||||
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_X>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
return true;
|
||||
}
|
||||
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_L>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
return true;
|
||||
}
|
||||
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_O>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
return true;
|
||||
}
|
||||
template <typename T, typename std::enable_if<T::key_type == KEY_TYPE_S>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
return true;
|
||||
}
|
||||
template <typename T, typename std::enable_if<T::key_type >= KEY_TYPE_V_I8>::type* = nullptr>
|
||||
bool CheckTag(const Instr::Op& op) {
|
||||
const Value* value = op.value;
|
||||
if (T::tag == -1) {
|
||||
return true;
|
||||
}
|
||||
if (table[T::tag].valid &&
|
||||
table[T::tag].op.value != value) {
|
||||
return false;
|
||||
}
|
||||
table[T::tag].valid = true;
|
||||
table[T::tag].op.value = (Value*)value;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DEST, typename... Tf>
|
||||
struct DestField;
|
||||
template <typename DEST>
|
||||
struct DestField<DEST> {
|
||||
DEST dest;
|
||||
protected:
|
||||
bool LoadDest(const Instr* i, TagTable& tag_table) {
|
||||
Instr::Op op;
|
||||
op.value = i->dest;
|
||||
if (tag_table.CheckTag<DEST>(op)) {
|
||||
dest.Load(op);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <>
|
||||
struct DestField<VoidOp> {
|
||||
protected:
|
||||
bool LoadDest(const Instr* i, TagTable& tag_table) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <hir::Opcode OPCODE, typename... Ts>
|
||||
struct I;
|
||||
template <hir::Opcode OPCODE, typename DEST>
|
||||
struct I<OPCODE, DEST> : DestField<DEST> {
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
const Instr* instr;
|
||||
protected:
|
||||
template <typename... Ti> friend struct SequenceFields;
|
||||
bool Load(const Instr* i, TagTable& tag_table) {
|
||||
if (InstrKey(i).value == key &&
|
||||
LoadDest(i, tag_table)) {
|
||||
instr = i;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1>
|
||||
struct I<OPCODE, DEST, SRC1> : DestField<DEST> {
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1;
|
||||
protected:
|
||||
template <typename... Ti> friend struct SequenceFields;
|
||||
bool Load(const Instr* i, TagTable& tag_table) {
|
||||
if (InstrKey(i).value == key &&
|
||||
LoadDest(i, tag_table) &&
|
||||
tag_table.CheckTag<SRC1>(i->src1)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2>
|
||||
struct I<OPCODE, DEST, SRC1, SRC2> : DestField<DEST> {
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type, SRC2::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
static const KeyType src2_type = SRC2::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1;
|
||||
SRC2 src2;
|
||||
protected:
|
||||
template <typename... Ti> friend struct SequenceFields;
|
||||
bool Load(const Instr* i, TagTable& tag_table) {
|
||||
if (InstrKey(i).value == key &&
|
||||
LoadDest(i, tag_table) &&
|
||||
tag_table.CheckTag<SRC1>(i->src1) &&
|
||||
tag_table.CheckTag<SRC2>(i->src2)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
src2.Load(i->src2);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2, typename SRC3>
|
||||
struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key = InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type, SRC2::key_type, SRC3::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
static const KeyType src2_type = SRC2::key_type;
|
||||
static const KeyType src3_type = SRC3::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1;
|
||||
SRC2 src2;
|
||||
SRC3 src3;
|
||||
protected:
|
||||
template <typename... Ti> friend struct SequenceFields;
|
||||
bool Load(const Instr* i, TagTable& tag_table) {
|
||||
if (InstrKey(i).value == key &&
|
||||
LoadDest(i, tag_table) &&
|
||||
tag_table.CheckTag<SRC1>(i->src1) &&
|
||||
tag_table.CheckTag<SRC2>(i->src2) &&
|
||||
tag_table.CheckTag<SRC3>(i->src3)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
src2.Load(i->src2);
|
||||
src3.Load(i->src3);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename... Ti>
|
||||
struct SequenceFields;
|
||||
template <typename I1>
|
||||
struct SequenceFields<I1> {
|
||||
I1 i1;
|
||||
typedef typename I1 I1Type;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (i1.Load(i, tag_table)) {
|
||||
*new_tail = i->next;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename I1, typename I2>
|
||||
struct SequenceFields<I1, I2> : SequenceFields<I1> {
|
||||
I2 i2;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (SequenceFields<I1>::Check(i, tag_table, new_tail)) {
|
||||
auto ni = i->next;
|
||||
if (ni && i2.Load(ni, tag_table)) {
|
||||
*new_tail = ni;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename I1, typename I2, typename I3>
|
||||
struct SequenceFields<I1, I2, I3> : SequenceFields<I1, I2> {
|
||||
I3 i3;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (SequenceFields<I1, I2>::Check(i, tag_table, new_tail)) {
|
||||
auto ni = i->next;
|
||||
if (ni && i3.Load(ni, tag_table)) {
|
||||
*new_tail = ni;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename I1, typename I2, typename I3, typename I4>
|
||||
struct SequenceFields<I1, I2, I3, I4> : SequenceFields<I1, I2, I3> {
|
||||
I4 i4;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (SequenceFields<I1, I2, I3>::Check(i, tag_table, new_tail)) {
|
||||
auto ni = i->next;
|
||||
if (ni && i4.Load(ni, tag_table)) {
|
||||
*new_tail = ni;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename I1, typename I2, typename I3, typename I4, typename I5>
|
||||
struct SequenceFields<I1, I2, I3, I4, I5> : SequenceFields<I1, I2, I3, I4> {
|
||||
I5 i5;
|
||||
protected:
|
||||
template <typename SEQ, typename... Ti> friend struct Sequence;
|
||||
bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) {
|
||||
if (SequenceFields<I1, I2, I3, I4>::Check(i, tag_table, new_tail)) {
|
||||
auto ni = i->next;
|
||||
if (ni && i5.Load(ni, tag_table)) {
|
||||
*new_tail = ni;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename SEQ, typename... Ti>
|
||||
struct Sequence {
|
||||
struct EmitArgs : SequenceFields<Ti...> {};
|
||||
|
||||
static bool Select(X64Emitter& e, const Instr* i, const Instr** new_tail) {
|
||||
EmitArgs args;
|
||||
TagTable tag_table;
|
||||
if (!args.Check(i, tag_table, new_tail)) {
|
||||
return false;
|
||||
}
|
||||
SEQ::Emit(e, args);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
const T GetTempReg(X64Emitter& e);
|
||||
template <>
|
||||
const Reg8 GetTempReg<Reg8>(X64Emitter& e) {
|
||||
return e.al;
|
||||
}
|
||||
template <>
|
||||
const Reg16 GetTempReg<Reg16>(X64Emitter& e) {
|
||||
return e.ax;
|
||||
}
|
||||
template <>
|
||||
const Reg32 GetTempReg<Reg32>(X64Emitter& e) {
|
||||
return e.eax;
|
||||
}
|
||||
template <>
|
||||
const Reg64 GetTempReg<Reg64>(X64Emitter& e) {
|
||||
return e.rax;
|
||||
}
|
||||
|
||||
template <typename SEQ, typename T>
|
||||
struct SingleSequence : public Sequence<SingleSequence<SEQ, T>, T> {
|
||||
typedef T EmitArgType;
|
||||
static const uint32_t head_key = T::key;
|
||||
static void Emit(X64Emitter& e, const EmitArgs& _) {
|
||||
SEQ::Emit(e, _.i1);
|
||||
}
|
||||
|
||||
template <typename REG_FN>
|
||||
static void EmitUnaryOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_FN& reg_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
e.mov(i.dest, i.src1.constant());
|
||||
reg_fn(e, i.dest);
|
||||
} else {
|
||||
if (i.dest != i.src1) {
|
||||
e.mov(i.dest, i.src1);
|
||||
}
|
||||
reg_fn(e, i.dest);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitCommutativeBinaryOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
if (i.dest == i.src2) {
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src1.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
|
||||
e.mov(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.dest == i.src1) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, i.src1);
|
||||
}
|
||||
} else {
|
||||
if (i.dest == i.src1) {
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
} else if (i.dest == i.src2) {
|
||||
reg_reg_fn(e, i.dest, i.src1);
|
||||
} else {
|
||||
e.mov(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
}
|
||||
}
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitAssociativeBinaryOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
if (i.dest == i.src2) {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2);
|
||||
e.mov(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
} else {
|
||||
e.mov(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.dest == i.src1) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, i.src1);
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (i.dest == i.src1) {
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
} else if (i.dest == i.src2) {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2);
|
||||
e.mov(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
} else {
|
||||
e.mov(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FN>
|
||||
static void EmitCommutativeBinaryXmmOp(
|
||||
X64Emitter& e, const EmitArgType& i, const FN& fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
e.LoadConstantXmm(e.xmm0, i.src1.constant());
|
||||
fn(e, i.dest, e.xmm0, i.src2);
|
||||
} else if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||
fn(e, i.dest, i.src1, e.xmm0);
|
||||
} else {
|
||||
fn(e, i.dest, i.src1, i.src2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FN>
|
||||
static void EmitAssociativeBinaryXmmOp(
|
||||
X64Emitter& e, const EmitArgType& i, const FN& fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
e.LoadConstantXmm(e.xmm0, i.src1.constant());
|
||||
fn(e, i.dest, e.xmm0, i.src2);
|
||||
} else if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||
fn(e, i.dest, i.src1, e.xmm0);
|
||||
} else {
|
||||
fn(e, i.dest, i.src1, i.src2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitCommutativeCompareOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.src2, static_cast<int32_t>(i.src1.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
|
||||
e.mov(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.src2, temp);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.src1, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.src1, temp);
|
||||
}
|
||||
} else {
|
||||
reg_reg_fn(e, i.src1, i.src2);
|
||||
}
|
||||
}
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitAssociativeCompareOp(
|
||||
X64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
XEASSERT(!i.src2.is_constant);
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, i.src2, static_cast<int32_t>(i.src1.constant()), true);
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src1)::reg_type>(e);
|
||||
e.mov(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2, temp, true);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, i.src1, static_cast<int32_t>(i.src2.constant()), false);
|
||||
} else {
|
||||
auto temp = GetTempReg<decltype(i.src2)::reg_type>(e);
|
||||
e.mov(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, i.src1, temp, false);
|
||||
}
|
||||
} else {
|
||||
reg_reg_fn(e, i.dest, i.src1, i.src2, false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static const int ANY = -1;
|
||||
typedef int tag_t;
|
||||
static const tag_t TAG0 = 0;
|
||||
static const tag_t TAG1 = 1;
|
||||
static const tag_t TAG2 = 2;
|
||||
static const tag_t TAG3 = 3;
|
||||
static const tag_t TAG4 = 4;
|
||||
static const tag_t TAG5 = 5;
|
||||
static const tag_t TAG6 = 6;
|
||||
static const tag_t TAG7 = 7;
|
||||
|
||||
typedef bool (*SequenceSelectFn)(X64Emitter&, const Instr*, const Instr**);
|
||||
|
||||
template <typename T>
|
||||
void Register() {
|
||||
sequence_table.insert({ T::head_key, T::Select });
|
||||
}
|
||||
template <typename T, typename Tn, typename... Ts>
|
||||
void Register() {
|
||||
Register<T>();
|
||||
Register<Tn, Ts...>();
|
||||
};
|
||||
#define EMITTER_OPCODE_TABLE(name, ...) \
|
||||
void Register_##name() { \
|
||||
Register<__VA_ARGS__>(); \
|
||||
}
|
||||
|
||||
#define MATCH(...) __VA_ARGS__
|
||||
#define EMITTER(name, match) struct name : SingleSequence<name, match>
|
||||
#define SEQUENCE(name, match) struct name : Sequence<name, match>
|
||||
|
||||
} // namespace
|
File diff suppressed because it is too large
Load Diff
|
@ -2,32 +2,32 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_
|
||||
#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_
|
||||
#ifndef ALLOY_BACKEND_X64_X64_SEQUENCES_H_
|
||||
#define ALLOY_BACKEND_X64_X64_SEQUENCES_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/hir/instr.h>
|
||||
|
||||
XEDECLARECLASS2(alloy, hir, Instr);
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
namespace lowering {
|
||||
|
||||
class LoweringTable;
|
||||
|
||||
void RegisterSequences(LoweringTable* table);
|
||||
class X64Emitter;
|
||||
|
||||
|
||||
void RegisterSequences();
|
||||
bool SelectSequence(X64Emitter& e, const hir::Instr* i, const hir::Instr** new_tail);
|
||||
|
||||
|
||||
} // namespace lowering
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_
|
||||
#endif // ALLOY_BACKEND_X64_X64_SEQUENCES_H_
|
|
@ -0,0 +1,145 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/backend/x64/x64_thunk_emitter.h>
|
||||
|
||||
#include <third_party/xbyak/xbyak/xbyak.h>
|
||||
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::backend::x64;
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
|
||||
X64ThunkEmitter::X64ThunkEmitter(
|
||||
X64Backend* backend, XbyakAllocator* allocator) :
|
||||
X64Emitter(backend, allocator) {
|
||||
}
|
||||
|
||||
X64ThunkEmitter::~X64ThunkEmitter() {
|
||||
}
|
||||
|
||||
HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
|
||||
// rcx = target
|
||||
// rdx = arg0
|
||||
// r8 = arg1
|
||||
|
||||
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
|
||||
// rsp + 0 = return address
|
||||
mov(qword[rsp + 8 * 3], r8);
|
||||
mov(qword[rsp + 8 * 2], rdx);
|
||||
mov(qword[rsp + 8 * 1], rcx);
|
||||
sub(rsp, stack_size);
|
||||
|
||||
mov(qword[rsp + 48], rbx);
|
||||
mov(qword[rsp + 56], rcx);
|
||||
mov(qword[rsp + 64], rbp);
|
||||
mov(qword[rsp + 72], rsi);
|
||||
mov(qword[rsp + 80], rdi);
|
||||
mov(qword[rsp + 88], r12);
|
||||
mov(qword[rsp + 96], r13);
|
||||
mov(qword[rsp + 104], r14);
|
||||
mov(qword[rsp + 112], r15);
|
||||
|
||||
/*movaps(ptr[rsp + 128], xmm6);
|
||||
movaps(ptr[rsp + 144], xmm7);
|
||||
movaps(ptr[rsp + 160], xmm8);
|
||||
movaps(ptr[rsp + 176], xmm9);
|
||||
movaps(ptr[rsp + 192], xmm10);
|
||||
movaps(ptr[rsp + 208], xmm11);
|
||||
movaps(ptr[rsp + 224], xmm12);
|
||||
movaps(ptr[rsp + 240], xmm13);
|
||||
movaps(ptr[rsp + 256], xmm14);
|
||||
movaps(ptr[rsp + 272], xmm15);*/
|
||||
|
||||
mov(rax, rcx);
|
||||
mov(rcx, rdx);
|
||||
mov(rdx, r8);
|
||||
call(rax);
|
||||
|
||||
/*movaps(xmm6, ptr[rsp + 128]);
|
||||
movaps(xmm7, ptr[rsp + 144]);
|
||||
movaps(xmm8, ptr[rsp + 160]);
|
||||
movaps(xmm9, ptr[rsp + 176]);
|
||||
movaps(xmm10, ptr[rsp + 192]);
|
||||
movaps(xmm11, ptr[rsp + 208]);
|
||||
movaps(xmm12, ptr[rsp + 224]);
|
||||
movaps(xmm13, ptr[rsp + 240]);
|
||||
movaps(xmm14, ptr[rsp + 256]);
|
||||
movaps(xmm15, ptr[rsp + 272]);*/
|
||||
|
||||
mov(rbx, qword[rsp + 48]);
|
||||
mov(rcx, qword[rsp + 56]);
|
||||
mov(rbp, qword[rsp + 64]);
|
||||
mov(rsi, qword[rsp + 72]);
|
||||
mov(rdi, qword[rsp + 80]);
|
||||
mov(r12, qword[rsp + 88]);
|
||||
mov(r13, qword[rsp + 96]);
|
||||
mov(r14, qword[rsp + 104]);
|
||||
mov(r15, qword[rsp + 112]);
|
||||
|
||||
add(rsp, stack_size);
|
||||
mov(rcx, qword[rsp + 8 * 1]);
|
||||
mov(rdx, qword[rsp + 8 * 2]);
|
||||
mov(r8, qword[rsp + 8 * 3]);
|
||||
ret();
|
||||
|
||||
void* fn = Emplace(stack_size);
|
||||
return (HostToGuestThunk)fn;
|
||||
}
|
||||
|
||||
GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
||||
// rcx = context
|
||||
// rdx = target function
|
||||
// r8 = arg0
|
||||
// r9 = arg1
|
||||
|
||||
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
|
||||
// rsp + 0 = return address
|
||||
mov(qword[rsp + 8 * 2], rdx);
|
||||
mov(qword[rsp + 8 * 1], rcx);
|
||||
sub(rsp, stack_size);
|
||||
|
||||
mov(qword[rsp + 48], rbx);
|
||||
mov(qword[rsp + 56], rcx);
|
||||
mov(qword[rsp + 64], rbp);
|
||||
mov(qword[rsp + 72], rsi);
|
||||
mov(qword[rsp + 80], rdi);
|
||||
mov(qword[rsp + 88], r12);
|
||||
mov(qword[rsp + 96], r13);
|
||||
mov(qword[rsp + 104], r14);
|
||||
mov(qword[rsp + 112], r15);
|
||||
|
||||
// TODO(benvanik): save things? XMM0-5?
|
||||
|
||||
mov(rax, rdx);
|
||||
mov(rdx, r8);
|
||||
mov(r8, r9);
|
||||
call(rax);
|
||||
|
||||
mov(rbx, qword[rsp + 48]);
|
||||
mov(rcx, qword[rsp + 56]);
|
||||
mov(rbp, qword[rsp + 64]);
|
||||
mov(rsi, qword[rsp + 72]);
|
||||
mov(rdi, qword[rsp + 80]);
|
||||
mov(r12, qword[rsp + 88]);
|
||||
mov(r13, qword[rsp + 96]);
|
||||
mov(r14, qword[rsp + 104]);
|
||||
mov(r15, qword[rsp + 112]);
|
||||
|
||||
add(rsp, stack_size);
|
||||
mov(rcx, qword[rsp + 8 * 1]);
|
||||
mov(rdx, qword[rsp + 8 * 2]);
|
||||
ret();
|
||||
|
||||
void* fn = Emplace(stack_size);
|
||||
return (HostToGuestThunk)fn;
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_X64_X64_THUNK_EMITTER_H_
|
||||
#define XENIA_CPU_X64_X64_THUNK_EMITTER_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/backend/x64/x64_backend.h>
|
||||
#include <alloy/backend/x64/x64_emitter.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
|
||||
|
||||
/**
|
||||
* Stack Layout
|
||||
* ----------------------------
|
||||
* NOTE: stack must always be 16b aligned.
|
||||
*
|
||||
* Thunk stack:
|
||||
* +------------------+
|
||||
* | arg temp, 3 * 8 | rsp + 0
|
||||
* | |
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 16b | rsp + 32
|
||||
* | |
|
||||
* +------------------+
|
||||
* | rbx | rsp + 48
|
||||
* +------------------+
|
||||
* | rcx / context | rsp + 56
|
||||
* +------------------+
|
||||
* | rbp | rsp + 64
|
||||
* +------------------+
|
||||
* | rsi | rsp + 72
|
||||
* +------------------+
|
||||
* | rdi | rsp + 80
|
||||
* +------------------+
|
||||
* | r12 | rsp + 88
|
||||
* +------------------+
|
||||
* | r13 | rsp + 96
|
||||
* +------------------+
|
||||
* | r14 | rsp + 104
|
||||
* +------------------+
|
||||
* | r15 | rsp + 112
|
||||
* +------------------+
|
||||
* | (return address) | rsp + 120
|
||||
* +------------------+
|
||||
* | (rcx home) | rsp + 128
|
||||
* +------------------+
|
||||
* | (rdx home) | rsp + 136
|
||||
* +------------------+
|
||||
*
|
||||
*
|
||||
* TODO:
|
||||
* +------------------+
|
||||
* | xmm6 | rsp + 128
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm7 | rsp + 144
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm8 | rsp + 160
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm9 | rsp + 176
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm10 | rsp + 192
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm11 | rsp + 208
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm12 | rsp + 224
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm13 | rsp + 240
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm14 | rsp + 256
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm15 | rsp + 272
|
||||
* | |
|
||||
* +------------------+
|
||||
*
|
||||
* Guest stack:
|
||||
* +------------------+
|
||||
* | arg temp, 3 * 8 | rsp + 0
|
||||
* | |
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 32b | rsp + 32
|
||||
* | |
|
||||
* +------------------+
|
||||
* | rcx / context | rsp + 64
|
||||
* +------------------+
|
||||
* | guest ret addr | rsp + 72
|
||||
* +------------------+
|
||||
* | call ret addr | rsp + 80
|
||||
* +------------------+
|
||||
* ... locals ...
|
||||
* +------------------+
|
||||
* | (return address) |
|
||||
* +------------------+
|
||||
*
|
||||
*/
|
||||
|
||||
class StackLayout {
|
||||
public:
|
||||
const static size_t THUNK_STACK_SIZE = 120;
|
||||
|
||||
const static size_t GUEST_STACK_SIZE = 88;
|
||||
const static size_t GUEST_RCX_HOME = 64;
|
||||
const static size_t GUEST_RET_ADDR = 72;
|
||||
const static size_t GUEST_CALL_RET_ADDR = 80;
|
||||
};
|
||||
|
||||
|
||||
class X64ThunkEmitter : public X64Emitter {
|
||||
public:
|
||||
X64ThunkEmitter(X64Backend* backend, XbyakAllocator* allocator);
|
||||
virtual ~X64ThunkEmitter();
|
||||
|
||||
// Call a generated function, saving all stack parameters.
|
||||
HostToGuestThunk EmitHostToGuestThunk();
|
||||
|
||||
// Function that guest code can call to transition into host code.
|
||||
GuestToHostThunk EmitGuestToHostThunk();
|
||||
};
|
||||
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // XENIA_CPU_X64_X64_THUNK_EMITTER_H_
|
|
@ -0,0 +1,200 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/backend/x64/x64_tracers.h>
|
||||
|
||||
#include <alloy/backend/x64/x64_emitter.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
#include <alloy/runtime/thread_state.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend::x64;
|
||||
using namespace alloy::runtime;
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
|
||||
#define ITRACE 0
|
||||
#define DTRACE 0
|
||||
|
||||
#define TARGET_THREAD 1
|
||||
|
||||
#define IFLUSH() fflush(stdout)
|
||||
#define IPRINT if (thread_state->thread_id() == TARGET_THREAD) printf
|
||||
#define DFLUSH() fflush(stdout)
|
||||
#define DPRINT DFLUSH(); if (thread_state->thread_id() == TARGET_THREAD) printf
|
||||
|
||||
uint32_t GetTracingMode() {
|
||||
uint32_t mode = 0;
|
||||
#if ITRACE
|
||||
mode |= TRACING_INSTR;
|
||||
#endif // ITRACE
|
||||
#if DTRACE
|
||||
mode |= TRACING_DATA;
|
||||
#endif // DTRACE
|
||||
return mode;
|
||||
}
|
||||
|
||||
void TraceString(void* raw_context, const char* str) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
IPRINT("XE[t] :%d: %s\n", thread_state->thread_id(), str);
|
||||
IFLUSH();
|
||||
}
|
||||
|
||||
void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%d (%X) = ctx i8 +%d\n", (int8_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%d (%X) = ctx i16 +%d\n", (int16_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%d (%X) = ctx i32 +%d\n", (int32_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%lld (%llX) = ctx i64 +%d\n", (int64_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadF32(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%e (%X) = ctx f32 +%d\n", value.m128_f32[0], value.m128_i32[0], offset);
|
||||
}
|
||||
void TraceContextLoadF64(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
double d;
|
||||
uint64_t x;
|
||||
} f;
|
||||
f.x = value.m128_i64[0];
|
||||
DPRINT("%lle (%llX) = ctx f64 +%d\n", f.d, value.m128_i64[0], offset);
|
||||
}
|
||||
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = ctx v128 +%d\n",
|
||||
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
|
||||
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3],
|
||||
offset);
|
||||
}
|
||||
|
||||
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("ctx i8 +%d = %d (%X)\n", offset, (int8_t)value, value);
|
||||
}
|
||||
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("ctx i16 +%d = %d (%X)\n", offset, (int16_t)value, value);
|
||||
}
|
||||
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("ctx i32 +%d = %d (%X)\n", offset, (int32_t)value, value);
|
||||
}
|
||||
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("ctx i64 +%d = %lld (%llX)\n", offset, (int64_t)value, value);
|
||||
}
|
||||
void TraceContextStoreF32(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("ctx f32 +%d = %e (%X)\n", offset, value.m128_i32[0], value.m128_f32[0]);
|
||||
}
|
||||
void TraceContextStoreF64(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
double d;
|
||||
uint64_t x;
|
||||
} f;
|
||||
f.x = value.m128_i64[0];
|
||||
DPRINT("ctx f64 +%d = %lle (%llX)\n", offset, value.m128_i64[0], f.d);
|
||||
}
|
||||
void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", offset,
|
||||
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
|
||||
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3]);
|
||||
}
|
||||
|
||||
void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%d (%X) = load.i8 %.8X\n", (int8_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%d (%X) = load.i16 %.8X\n", (int16_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%d (%X) = load.i32 %.8X\n", (int32_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%lld (%llX) = load.i64 %.8X\n", (int64_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadF32(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%e (%X) = load.f32 %.8X\n", value.m128_f32[0], value.m128_i32[0], address);
|
||||
}
|
||||
void TraceMemoryLoadF64(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
double d;
|
||||
uint64_t x;
|
||||
} f;
|
||||
f.x = value.m128_i64[0];
|
||||
DPRINT("%lle (%llX) = load.f64 %.8X\n", f.d, value.m128_i64[0], address);
|
||||
}
|
||||
void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load.v128 %.8X\n",
|
||||
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
|
||||
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3],
|
||||
address);
|
||||
}
|
||||
|
||||
void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("store.i8 %.8X = %d (%X)\n", address, (int8_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("store.i16 %.8X = %d (%X)\n", address, (int16_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("store.i32 %.8X = %d (%X)\n", address, (int32_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("store.i64 %.8X = %lld (%llX)\n", address, (int64_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("store.f32 %.8X = %e (%X)\n", address, value.m128_f32[0], value.m128_i32[0]);
|
||||
}
|
||||
void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
double d;
|
||||
uint64_t x;
|
||||
} f;
|
||||
f.x = value.m128_i64[0];
|
||||
DPRINT("store.f64 %.8X = %lle (%llX)\n", address, f.d, value.m128_i64[0]);
|
||||
}
|
||||
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("store.v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", address,
|
||||
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
|
||||
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3]);
|
||||
}
|
||||
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
|
@ -0,0 +1,85 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_BACKEND_X64_X64_TRACERS_H_
|
||||
#define ALLOY_BACKEND_X64_X64_TRACERS_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
|
||||
#if XE_LIKE_WIN32
|
||||
#include <xmmintrin.h>
|
||||
#else
|
||||
typedef union __declspec(align(16)) __m128 {
|
||||
float m128_f32[4];
|
||||
uint64_t m128_u64[2];
|
||||
int8_t m128_i8[16];
|
||||
int16_t m128_i16[8];
|
||||
int32_t m128_i32[4];
|
||||
int64_t m128_i64[2];
|
||||
uint8_t m128_u8[16];
|
||||
uint16_t m128_u16[8];
|
||||
uint32_t m128_u32[4];
|
||||
} __m128;
|
||||
#endif
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
class X64Emitter;
|
||||
|
||||
enum TracingMode {
|
||||
TRACING_INSTR = (1 << 1),
|
||||
TRACING_DATA = (1 << 2),
|
||||
};
|
||||
|
||||
uint32_t GetTracingMode();
|
||||
inline bool IsTracingInstr() { return (GetTracingMode() & TRACING_INSTR) != 0; }
|
||||
inline bool IsTracingData() { return (GetTracingMode() & TRACING_DATA) != 0; }
|
||||
|
||||
void TraceString(void* raw_context, const char* str);
|
||||
|
||||
void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value);
|
||||
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value);
|
||||
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value);
|
||||
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value);
|
||||
void TraceContextLoadF32(void* raw_context, uint64_t offset, __m128 value);
|
||||
void TraceContextLoadF64(void* raw_context, uint64_t offset, __m128 value);
|
||||
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value);
|
||||
|
||||
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value);
|
||||
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value);
|
||||
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value);
|
||||
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value);
|
||||
void TraceContextStoreF32(void* raw_context, uint64_t offset, __m128 value);
|
||||
void TraceContextStoreF64(void* raw_context, uint64_t offset, __m128 value);
|
||||
void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value);
|
||||
|
||||
void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value);
|
||||
void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value);
|
||||
void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value);
|
||||
void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value);
|
||||
void TraceMemoryLoadF32(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryLoadF64(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value);
|
||||
|
||||
void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value);
|
||||
void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value);
|
||||
void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value);
|
||||
void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value);
|
||||
void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value);
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_BACKEND_X64_X64_TRACERS_H_
|
|
@ -20,6 +20,8 @@ using namespace alloy::runtime;
|
|||
|
||||
Compiler::Compiler(Runtime* runtime) :
|
||||
runtime_(runtime) {
|
||||
scratch_arena_ = new Arena();
|
||||
|
||||
alloy::tracing::WriteEvent(EventType::Init({
|
||||
}));
|
||||
}
|
||||
|
@ -32,6 +34,8 @@ Compiler::~Compiler() {
|
|||
delete pass;
|
||||
}
|
||||
|
||||
delete scratch_arena_;
|
||||
|
||||
alloy::tracing::WriteEvent(EventType::Deinit({
|
||||
}));
|
||||
}
|
||||
|
@ -45,10 +49,13 @@ void Compiler::Reset() {
|
|||
}
|
||||
|
||||
int Compiler::Compile(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// TODO(benvanik): sophisticated stuff. Run passes in parallel, run until they
|
||||
// stop changing things, etc.
|
||||
for (auto it = passes_.begin(); it != passes_.end(); ++it) {
|
||||
CompilerPass* pass = *it;
|
||||
scratch_arena_->Reset();
|
||||
if (pass->Run(builder)) {
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ public:
|
|||
~Compiler();
|
||||
|
||||
runtime::Runtime* runtime() const { return runtime_; }
|
||||
Arena* scratch_arena() const { return scratch_arena_; }
|
||||
|
||||
void AddPass(CompilerPass* pass);
|
||||
|
||||
|
@ -37,6 +38,7 @@ public:
|
|||
|
||||
private:
|
||||
runtime::Runtime* runtime_;
|
||||
Arena* scratch_arena_;
|
||||
|
||||
typedef std::vector<CompilerPass*> PassList;
|
||||
PassList passes_;
|
||||
|
|
|
@ -27,3 +27,7 @@ int CompilerPass::Initialize(Compiler* compiler) {
|
|||
compiler_ = compiler;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Arena* CompilerPass::scratch_arena() const {
|
||||
return compiler_->scratch_arena();
|
||||
}
|
||||
|
|
|
@ -32,6 +32,9 @@ public:
|
|||
|
||||
virtual int Run(hir::HIRBuilder* builder) = 0;
|
||||
|
||||
protected:
|
||||
Arena* scratch_arena() const;
|
||||
|
||||
protected:
|
||||
runtime::Runtime* runtime_;
|
||||
Compiler* compiler_;
|
||||
|
|
|
@ -11,11 +11,15 @@
|
|||
#define ALLOY_COMPILER_COMPILER_PASSES_H_
|
||||
|
||||
#include <alloy/compiler/passes/constant_propagation_pass.h>
|
||||
#include <alloy/compiler/passes/control_flow_analysis_pass.h>
|
||||
#include <alloy/compiler/passes/context_promotion_pass.h>
|
||||
#include <alloy/compiler/passes/data_flow_analysis_pass.h>
|
||||
#include <alloy/compiler/passes/dead_code_elimination_pass.h>
|
||||
//#include <alloy/compiler/passes/dead_store_elimination_pass.h>
|
||||
#include <alloy/compiler/passes/finalization_pass.h>
|
||||
//#include <alloy/compiler/passes/dead_store_elimination_pass.h>
|
||||
#include <alloy/compiler/passes/register_allocation_pass.h>
|
||||
#include <alloy/compiler/passes/simplification_pass.h>
|
||||
#include <alloy/compiler/passes/validation_pass.h>
|
||||
#include <alloy/compiler/passes/value_reduction_pass.h>
|
||||
|
||||
// TODO:
|
||||
|
@ -134,5 +138,42 @@
|
|||
// store_context +302, v5
|
||||
// branch_true v5, ...
|
||||
//
|
||||
// - X86Canonicalization
|
||||
// For various opcodes add copies/commute the arguments to match x86
|
||||
// operand semantics. This makes code generation easier and if done
|
||||
// before register allocation can prevent a lot of extra shuffling in
|
||||
// the emitted code.
|
||||
//
|
||||
// Example:
|
||||
// <block0>:
|
||||
// v0 = ...
|
||||
// v1 = ...
|
||||
// v2 = add v0, v1 <-- v1 now unused
|
||||
// Becomes:
|
||||
// v0 = ...
|
||||
// v1 = ...
|
||||
// v1 = add v1, v0 <-- src1 = dest/src, so reuse for both
|
||||
// by commuting and setting dest = src1
|
||||
//
|
||||
// - RegisterAllocation
|
||||
// Given a machine description (register classes, counts) run over values
|
||||
// and assign them to registers, adding spills as needed. It should be
|
||||
// possible to directly emit code from this form.
|
||||
//
|
||||
// Example:
|
||||
// <block0>:
|
||||
// v0 = load_context +0
|
||||
// v1 = load_context +1
|
||||
// v0 = add v0, v1
|
||||
// ...
|
||||
// v2 = mul v0, v1
|
||||
// Becomes:
|
||||
// reg0 = load_context +0
|
||||
// reg1 = load_context +1
|
||||
// reg2 = add reg0, reg1
|
||||
// store_local +123, reg2 <-- spill inserted
|
||||
// ...
|
||||
// reg0 = load_local +123 <-- load inserted
|
||||
// reg0 = mul reg0, reg1
|
||||
|
||||
#endif // ALLOY_COMPILER_COMPILER_PASSES_H_
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
|
||||
#include <alloy/compiler/passes/constant_propagation_pass.h>
|
||||
|
||||
#include <alloy/runtime/function.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::compiler;
|
||||
using namespace alloy::compiler::passes;
|
||||
|
@ -23,6 +26,8 @@ ConstantPropagationPass::~ConstantPropagationPass() {
|
|||
}
|
||||
|
||||
int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Once ContextPromotion has run there will likely be a whole slew of
|
||||
// constants that can be pushed through the function.
|
||||
// Example:
|
||||
|
@ -41,6 +46,14 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
|||
// v1 = add 1000, 1000
|
||||
// store_context +200, 2000
|
||||
// A DCE run after this should clean up any of the values no longer needed.
|
||||
//
|
||||
// Special care needs to be taken with paired instructions. For example,
|
||||
// DID_CARRY needs to be set as a constant:
|
||||
// v1 = sub.2 20, 1
|
||||
// v2 = did_carry v1
|
||||
// should become:
|
||||
// v1 = 19
|
||||
// v2 = 0
|
||||
|
||||
Block* block = builder->first_block();
|
||||
while (block) {
|
||||
|
@ -79,6 +92,17 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case OPCODE_CALL_INDIRECT:
|
||||
if (i->src1.value->IsConstant()) {
|
||||
runtime::FunctionInfo* symbol_info;
|
||||
if (runtime_->LookupFunctionInfo(
|
||||
(uint32_t)i->src1.value->constant.i32, &symbol_info)) {
|
||||
break;
|
||||
}
|
||||
i->Replace(&OPCODE_CALL_info, i->flags);
|
||||
i->src1.symbol_info = symbol_info;
|
||||
}
|
||||
break;
|
||||
case OPCODE_CALL_INDIRECT_TRUE:
|
||||
if (i->src1.value->IsConstant()) {
|
||||
if (i->src1.value->IsConstantTrue()) {
|
||||
|
@ -179,20 +203,112 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
|||
break;
|
||||
|
||||
// TODO(benvanik): compares
|
||||
case OPCODE_COMPARE_EQ:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantEQ(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_NE:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantNE(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_SLT:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantSLT(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_SLE:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantSLE(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_SGT:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantSGT(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_SGE:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantSGE(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_ULT:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantULT(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_ULE:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantULE(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_UGT:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantUGT(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_COMPARE_UGE:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
bool value = i->src1.value->IsConstantUGE(i->src2.value);
|
||||
i->dest->set_constant(value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_DID_CARRY:
|
||||
XEASSERT(!i->src1.value->IsConstant());
|
||||
break;
|
||||
case OPCODE_DID_OVERFLOW:
|
||||
XEASSERT(!i->src1.value->IsConstant());
|
||||
break;
|
||||
case OPCODE_DID_SATURATE:
|
||||
XEASSERT(!i->src1.value->IsConstant());
|
||||
break;
|
||||
|
||||
case OPCODE_ADD:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->Add(i->src2.value);
|
||||
bool did_carry = v->Add(i->src2.value);
|
||||
bool propagate_carry = !!(i->flags & ARITHMETIC_SET_CARRY);
|
||||
i->Remove();
|
||||
|
||||
// If carry is set find the DID_CARRY and fix it.
|
||||
if (propagate_carry) {
|
||||
PropagateCarry(v, did_carry);
|
||||
}
|
||||
}
|
||||
break;
|
||||
// TODO(benvanik): ADD_CARRY
|
||||
// TODO(benvanik): ADD_CARRY (w/ ARITHMETIC_SET_CARRY)
|
||||
case OPCODE_SUB:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->Sub(i->src2.value);
|
||||
bool did_carry = v->Sub(i->src2.value);
|
||||
bool propagate_carry = !!(i->flags & ARITHMETIC_SET_CARRY);
|
||||
i->Remove();
|
||||
|
||||
// If carry is set find the DID_CARRY and fix it.
|
||||
if (propagate_carry) {
|
||||
PropagateCarry(v, did_carry);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OPCODE_MUL:
|
||||
|
@ -298,6 +414,13 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
|||
i->Remove();
|
||||
}
|
||||
break;
|
||||
case OPCODE_CNTLZ:
|
||||
if (i->src1.value->IsConstant()) {
|
||||
v->set_zero(v->type);
|
||||
v->CountLeadingZeros(i->src1.value);
|
||||
i->Remove();
|
||||
}
|
||||
break;
|
||||
// TODO(benvanik): INSERT/EXTRACT
|
||||
// TODO(benvanik): SPLAT/PERMUTE/SWIZZLE
|
||||
case OPCODE_SPLAT:
|
||||
|
@ -314,3 +437,16 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ConstantPropagationPass::PropagateCarry(hir::Value* v, bool did_carry) {
|
||||
auto next = v->use_head;
|
||||
while (next) {
|
||||
auto use = next;
|
||||
next = use->next;
|
||||
if (use->instr->opcode == &OPCODE_DID_CARRY_info) {
|
||||
// Replace carry value.
|
||||
use->instr->dest->set_constant(did_carry ? 1 : 0);
|
||||
use->instr->Remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ public:
|
|||
virtual int Run(hir::HIRBuilder* builder);
|
||||
|
||||
private:
|
||||
void PropagateCarry(hir::Value* v, bool did_carry);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include <alloy/compiler/passes/context_promotion_pass.h>
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include <alloy/compiler/compiler.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
||||
|
@ -20,6 +22,10 @@ using namespace alloy::hir;
|
|||
using namespace alloy::runtime;
|
||||
|
||||
|
||||
DEFINE_bool(store_all_context_values, false,
|
||||
"Don't strip dead context stores to aid in debugging.");
|
||||
|
||||
|
||||
ContextPromotionPass::ContextPromotionPass() :
|
||||
context_values_size_(0), context_values_(0),
|
||||
CompilerPass() {
|
||||
|
@ -45,6 +51,8 @@ int ContextPromotionPass::Initialize(Compiler* compiler) {
|
|||
}
|
||||
|
||||
int ContextPromotionPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Like mem2reg, but because context memory is unaliasable it's easier to
|
||||
// check and convert LoadContext/StoreContext into value operations.
|
||||
// Example of load->value promotion:
|
||||
|
@ -69,10 +77,12 @@ int ContextPromotionPass::Run(HIRBuilder* builder) {
|
|||
}
|
||||
|
||||
// Remove all dead stores.
|
||||
block = builder->first_block();
|
||||
while (block) {
|
||||
RemoveDeadStoresBlock(block);
|
||||
block = block->next;
|
||||
if (!FLAGS_store_all_context_values) {
|
||||
block = builder->first_block();
|
||||
while (block) {
|
||||
RemoveDeadStoresBlock(block);
|
||||
block = block->next;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/compiler/passes/control_flow_analysis_pass.h>
|
||||
|
||||
#include <alloy/backend/backend.h>
|
||||
#include <alloy/compiler/compiler.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::compiler;
|
||||
using namespace alloy::compiler::passes;
|
||||
using namespace alloy::frontend;
|
||||
using namespace alloy::hir;
|
||||
using namespace alloy::runtime;
|
||||
|
||||
|
||||
ControlFlowAnalysisPass::ControlFlowAnalysisPass() :
|
||||
CompilerPass() {
|
||||
}
|
||||
|
||||
ControlFlowAnalysisPass::~ControlFlowAnalysisPass() {
|
||||
}
|
||||
|
||||
int ControlFlowAnalysisPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// TODO(benvanik): reset edges for all blocks? Needed to be re-runnable.
|
||||
|
||||
// Add edges.
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
auto instr = block->instr_tail;
|
||||
while (instr) {
|
||||
if ((instr->opcode->flags & OPCODE_FLAG_BRANCH) == 0) {
|
||||
break;
|
||||
}
|
||||
if (instr->opcode == &OPCODE_BRANCH_info) {
|
||||
auto label = instr->src1.label;
|
||||
builder->AddEdge(block, label->block, Edge::UNCONDITIONAL);
|
||||
} else if (instr->opcode == &OPCODE_BRANCH_TRUE_info ||
|
||||
instr->opcode == &OPCODE_BRANCH_FALSE_info) {
|
||||
auto label = instr->src2.label;
|
||||
builder->AddEdge(block, label->block, 0);
|
||||
}
|
||||
instr = instr->prev;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
// Mark dominators.
|
||||
block = builder->first_block();
|
||||
while (block) {
|
||||
if (block->incoming_edge_head &&
|
||||
!block->incoming_edge_head->incoming_next) {
|
||||
block->incoming_edge_head->flags |= Edge::DOMINATES;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_COMPILER_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
|
||||
#define ALLOY_COMPILER_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
|
||||
|
||||
#include <alloy/compiler/compiler_pass.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace compiler {
|
||||
namespace passes {
|
||||
|
||||
|
||||
class ControlFlowAnalysisPass : public CompilerPass {
|
||||
public:
|
||||
ControlFlowAnalysisPass();
|
||||
virtual ~ControlFlowAnalysisPass();
|
||||
|
||||
virtual int Run(hir::HIRBuilder* builder);
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
|
||||
} // namespace passes
|
||||
} // namespace compiler
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_COMPILER_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
|
|
@ -0,0 +1,203 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/compiler/passes/data_flow_analysis_pass.h>
|
||||
|
||||
#include <alloy/backend/backend.h>
|
||||
#include <alloy/compiler/compiler.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4244)
|
||||
#pragma warning(disable : 4267)
|
||||
#include <llvm/ADT/BitVector.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::compiler;
|
||||
using namespace alloy::compiler::passes;
|
||||
using namespace alloy::frontend;
|
||||
using namespace alloy::hir;
|
||||
using namespace alloy::runtime;
|
||||
|
||||
|
||||
DataFlowAnalysisPass::DataFlowAnalysisPass() :
|
||||
CompilerPass() {
|
||||
}
|
||||
|
||||
DataFlowAnalysisPass::~DataFlowAnalysisPass() {
|
||||
}
|
||||
|
||||
int DataFlowAnalysisPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Linearize blocks so that we can detect cycles and propagate dependencies.
|
||||
uint32_t block_count = LinearizeBlocks(builder);
|
||||
|
||||
// Analyze value flow and add locals as needed.
|
||||
AnalyzeFlow(builder, block_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t DataFlowAnalysisPass::LinearizeBlocks(HIRBuilder* builder) {
|
||||
// TODO(benvanik): actually do this - we cheat now knowing that they are in
|
||||
// sequential order.
|
||||
uint32_t block_ordinal = 0;
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
block->ordinal = block_ordinal++;
|
||||
block = block->next;
|
||||
}
|
||||
return block_ordinal;
|
||||
}
|
||||
|
||||
void DataFlowAnalysisPass::AnalyzeFlow(HIRBuilder* builder,
|
||||
uint32_t block_count) {
|
||||
uint32_t max_value_estimate =
|
||||
builder->max_value_ordinal() + 1 + block_count * 4;
|
||||
|
||||
// Stash for value map. We may want to maintain this during building.
|
||||
auto arena = builder->arena();
|
||||
Value** value_map = (Value**)arena->Alloc(
|
||||
sizeof(Value*) * max_value_estimate);
|
||||
|
||||
// Allocate incoming bitvectors for use by blocks. We don't need outgoing
|
||||
// because they are only used during the block iteration.
|
||||
// Mapped by block ordinal.
|
||||
// TODO(benvanik): cache this list, grow as needed, etc.
|
||||
auto incoming_bitvectors = (llvm::BitVector**)arena->Alloc(
|
||||
sizeof(llvm::BitVector*) * block_count);
|
||||
for (auto n = 0u; n < block_count; n++) {
|
||||
incoming_bitvectors[n] = new llvm::BitVector(max_value_estimate);
|
||||
}
|
||||
|
||||
// Walk blocks in reverse and calculate incoming/outgoing values.
|
||||
auto block = builder->last_block();
|
||||
while (block) {
|
||||
// Allocate bitsets based on max value number.
|
||||
block->incoming_values = incoming_bitvectors[block->ordinal];
|
||||
auto& incoming_values = *block->incoming_values;
|
||||
|
||||
// Walk instructions and gather up incoming values.
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
uint32_t signature = instr->opcode->signature;
|
||||
#define SET_INCOMING_VALUE(v) \
|
||||
if (v->def && v->def->block != block) { \
|
||||
incoming_values.set(v->ordinal); \
|
||||
} \
|
||||
XEASSERT(v->ordinal < max_value_estimate); \
|
||||
value_map[v->ordinal] = v;
|
||||
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
|
||||
SET_INCOMING_VALUE(instr->src1.value);
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
|
||||
SET_INCOMING_VALUE(instr->src2.value);
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
|
||||
SET_INCOMING_VALUE(instr->src3.value);
|
||||
}
|
||||
#undef SET_INCOMING_VALUE
|
||||
instr = instr->next;
|
||||
}
|
||||
|
||||
// Add all successor incoming values to our outgoing, as we need to
|
||||
// pass them through.
|
||||
llvm::BitVector outgoing_values(max_value_estimate);
|
||||
auto outgoing_edge = block->outgoing_edge_head;
|
||||
while (outgoing_edge) {
|
||||
if (outgoing_edge->dest->ordinal > block->ordinal) {
|
||||
outgoing_values |= *outgoing_edge->dest->incoming_values;
|
||||
}
|
||||
outgoing_edge = outgoing_edge->outgoing_next;
|
||||
}
|
||||
incoming_values |= outgoing_values;
|
||||
|
||||
// Add stores for all outgoing values.
|
||||
auto outgoing_ordinal = outgoing_values.find_first();
|
||||
while (outgoing_ordinal != -1) {
|
||||
Value* src_value = value_map[outgoing_ordinal];
|
||||
XEASSERTNOTNULL(src_value);
|
||||
if (!src_value->local_slot) {
|
||||
src_value->local_slot = builder->AllocLocal(src_value->type);
|
||||
}
|
||||
builder->StoreLocal(src_value->local_slot, src_value);
|
||||
|
||||
// If we are in the block the value was defined in:
|
||||
if (src_value->def->block == block) {
|
||||
// Move the store to right after the def, or as soon after
|
||||
// as we can (respecting PAIRED flags).
|
||||
auto def_next = src_value->def->next;
|
||||
while (def_next && def_next->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
|
||||
def_next = def_next->next;
|
||||
}
|
||||
XEASSERTNOTNULL(def_next);
|
||||
builder->last_instr()->MoveBefore(def_next);
|
||||
|
||||
// We don't need it in the incoming list.
|
||||
incoming_values.reset(outgoing_ordinal);
|
||||
} else {
|
||||
// Eh, just throw at the end, before the first branch.
|
||||
auto tail = block->instr_tail;
|
||||
while (tail && tail->opcode->flags & OPCODE_FLAG_BRANCH) {
|
||||
tail = tail->prev;
|
||||
}
|
||||
XEASSERTNOTZERO(tail);
|
||||
builder->last_instr()->MoveBefore(tail->next);
|
||||
}
|
||||
|
||||
outgoing_ordinal = outgoing_values.find_next(outgoing_ordinal);
|
||||
}
|
||||
|
||||
// Add loads for all incoming values and rename them in the block.
|
||||
auto incoming_ordinal = incoming_values.find_first();
|
||||
while (incoming_ordinal != -1) {
|
||||
Value* src_value = value_map[incoming_ordinal];
|
||||
XEASSERTNOTNULL(src_value);
|
||||
if (!src_value->local_slot) {
|
||||
src_value->local_slot = builder->AllocLocal(src_value->type);
|
||||
}
|
||||
Value* local_value = builder->LoadLocal(src_value->local_slot);
|
||||
builder->last_instr()->MoveBefore(block->instr_head);
|
||||
|
||||
// Swap uses of original value with the local value.
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
uint32_t signature = instr->opcode->signature;
|
||||
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src1.value == src_value) {
|
||||
instr->set_src1(local_value);
|
||||
}
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src2.value == src_value) {
|
||||
instr->set_src2(local_value);
|
||||
}
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src3.value == src_value) {
|
||||
instr->set_src3(local_value);
|
||||
}
|
||||
}
|
||||
instr = instr->next;
|
||||
}
|
||||
|
||||
incoming_ordinal = incoming_values.find_next(incoming_ordinal);
|
||||
}
|
||||
|
||||
block = block->prev;
|
||||
}
|
||||
|
||||
// Cleanup bitvectors.
|
||||
for (auto n = 0u; n < block_count; n++) {
|
||||
delete incoming_bitvectors[n];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_COMPILER_PASSES_DATA_FLOW_ANALYSIS_PASS_H_
|
||||
#define ALLOY_COMPILER_PASSES_DATA_FLOW_ANALYSIS_PASS_H_
|
||||
|
||||
#include <alloy/compiler/compiler_pass.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace compiler {
|
||||
namespace passes {
|
||||
|
||||
|
||||
class DataFlowAnalysisPass : public CompilerPass {
|
||||
public:
|
||||
DataFlowAnalysisPass();
|
||||
virtual ~DataFlowAnalysisPass();
|
||||
|
||||
virtual int Run(hir::HIRBuilder* builder);
|
||||
|
||||
private:
|
||||
uint32_t LinearizeBlocks(hir::HIRBuilder* builder);
|
||||
void AnalyzeFlow(hir::HIRBuilder* builder, uint32_t block_count);
|
||||
};
|
||||
|
||||
|
||||
} // namespace passes
|
||||
} // namespace compiler
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_COMPILER_PASSES_DATA_FLOW_ANALYSIS_PASS_H_
|
|
@ -23,6 +23,8 @@ DeadCodeEliminationPass::~DeadCodeEliminationPass() {
|
|||
}
|
||||
|
||||
int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// ContextPromotion/DSE will likely leave around a lot of dead statements.
|
||||
// Code generated for comparison/testing produces many unused statements and
|
||||
// with proper use analysis it should be possible to remove most of them:
|
||||
|
@ -59,20 +61,21 @@ int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
|
|||
// all removed ops with NOP and then do a single pass that removes them
|
||||
// all.
|
||||
|
||||
bool any_removed = false;
|
||||
bool any_instr_removed = false;
|
||||
bool any_locals_removed = false;
|
||||
Block* block = builder->first_block();
|
||||
while (block) {
|
||||
// Walk instructions in reverse.
|
||||
Instr* i = block->instr_tail;
|
||||
while (i) {
|
||||
Instr* prev = i->prev;
|
||||
auto prev = i->prev;
|
||||
|
||||
const OpcodeInfo* opcode = i->opcode;
|
||||
uint32_t signature = opcode->signature;
|
||||
auto opcode = i->opcode;
|
||||
if (!(opcode->flags & OPCODE_FLAG_VOLATILE) &&
|
||||
i->dest && !i->dest->use_head) {
|
||||
// Has no uses and is not volatile. This instruction can die!
|
||||
MakeNopRecursive(i);
|
||||
any_removed = true;
|
||||
any_instr_removed = true;
|
||||
} else if (opcode == &OPCODE_ASSIGN_info) {
|
||||
// Assignment. These are useless, so just try to remove by completely
|
||||
// replacing the value.
|
||||
|
@ -82,11 +85,31 @@ int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
|
|||
i = prev;
|
||||
}
|
||||
|
||||
// Walk instructions forward.
|
||||
i = block->instr_head;
|
||||
while (i) {
|
||||
auto next = i->next;
|
||||
|
||||
auto opcode = i->opcode;
|
||||
if (opcode == &OPCODE_STORE_LOCAL_info) {
|
||||
// Check to see if the store has any interceeding uses after the load.
|
||||
// If not, it can be removed (as the local is just passing through the
|
||||
// function).
|
||||
// We do this after the previous pass so that removed code doesn't keep
|
||||
// the local alive.
|
||||
if (!CheckLocalUse(i)) {
|
||||
any_locals_removed = true;
|
||||
}
|
||||
}
|
||||
|
||||
i = next;
|
||||
}
|
||||
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
// Remove all nops.
|
||||
if (any_removed) {
|
||||
if (any_instr_removed) {
|
||||
Block* block = builder->first_block();
|
||||
while (block) {
|
||||
Instr* i = block->instr_head;
|
||||
|
@ -102,6 +125,21 @@ int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
|
|||
}
|
||||
}
|
||||
|
||||
// Remove any locals that no longer have uses.
|
||||
if (any_locals_removed) {
|
||||
// TODO(benvanik): local removal/dealloc.
|
||||
auto locals = builder->locals();
|
||||
for (auto it = locals.begin(); it != locals.end();) {
|
||||
auto next = ++it;
|
||||
auto value = *it;
|
||||
if (!value->use_head) {
|
||||
// Unused, can be removed.
|
||||
locals.erase(it);
|
||||
}
|
||||
it = next;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -150,3 +188,24 @@ void DeadCodeEliminationPass::ReplaceAssignment(Instr* i) {
|
|||
|
||||
i->Remove();
|
||||
}
|
||||
|
||||
bool DeadCodeEliminationPass::CheckLocalUse(Instr* i) {
|
||||
auto slot = i->src1.value;
|
||||
auto src = i->src2.value;
|
||||
|
||||
auto use = src->use_head;
|
||||
if (use) {
|
||||
auto use_instr = use->instr;
|
||||
if (use_instr->opcode != &OPCODE_LOAD_LOCAL_info) {
|
||||
// A valid use (probably). Keep it.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Load/store are paired. They can both be removed.
|
||||
use_instr->Remove();
|
||||
}
|
||||
|
||||
i->Remove();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ public:
|
|||
private:
|
||||
void MakeNopRecursive(hir::Instr* i);
|
||||
void ReplaceAssignment(hir::Instr* i);
|
||||
bool CheckLocalUse(hir::Instr* i);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -30,6 +30,8 @@ FinalizationPass::~FinalizationPass() {
|
|||
}
|
||||
|
||||
int FinalizationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Process the HIR and prepare it for lowering.
|
||||
// After this is done the HIR should be ready for emitting.
|
||||
|
||||
|
@ -44,9 +46,9 @@ int FinalizationPass::Run(HIRBuilder* builder) {
|
|||
auto label = block->label_head;
|
||||
while (label) {
|
||||
if (!label->name) {
|
||||
char* name = (char*)arena->Alloc(6 + 4 + 1);
|
||||
xestrcpya(name, 6 + 1, "_label");
|
||||
char* part = _itoa(label->id, name + 6, 10);
|
||||
const size_t label_len = 6 + 4 + 1;
|
||||
char* name = (char*)arena->Alloc(label_len);
|
||||
xesnprintfa(name, label_len, "_label%d", label->id);
|
||||
label->name = name;
|
||||
}
|
||||
label = label->next;
|
||||
|
|
|
@ -0,0 +1,539 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/compiler/passes/register_allocation_pass.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::compiler;
|
||||
using namespace alloy::compiler::passes;
|
||||
using namespace alloy::hir;
|
||||
|
||||
|
||||
#define ASSERT_NO_CYCLES 0
|
||||
|
||||
|
||||
RegisterAllocationPass::RegisterAllocationPass(
|
||||
const MachineInfo* machine_info) :
|
||||
machine_info_(machine_info),
|
||||
CompilerPass() {
|
||||
// Initialize register sets.
|
||||
// TODO(benvanik): rewrite in a way that makes sense - this is terrible.
|
||||
auto mi_sets = machine_info->register_sets;
|
||||
xe_zero_struct(&usage_sets_, sizeof(usage_sets_));
|
||||
uint32_t n = 0;
|
||||
while (mi_sets[n].count) {
|
||||
auto& mi_set = mi_sets[n];
|
||||
auto usage_set = new RegisterSetUsage();
|
||||
usage_sets_.all_sets[n] = usage_set;
|
||||
usage_set->count = mi_set.count;
|
||||
usage_set->set = &mi_set;
|
||||
if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) {
|
||||
usage_sets_.int_set = usage_set;
|
||||
}
|
||||
if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) {
|
||||
usage_sets_.float_set = usage_set;
|
||||
}
|
||||
if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) {
|
||||
usage_sets_.vec_set = usage_set;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
RegisterAllocationPass::~RegisterAllocationPass() {
|
||||
for (size_t n = 0; n < XECOUNT(usage_sets_.all_sets); n++) {
|
||||
if (!usage_sets_.all_sets[n]) {
|
||||
break;
|
||||
}
|
||||
delete usage_sets_.all_sets[n];
|
||||
}
|
||||
}
|
||||
|
||||
int RegisterAllocationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Simple per-block allocator that operates on SSA form.
|
||||
// Registers do not move across blocks, though this could be
|
||||
// optimized with some intra-block analysis (dominators/etc).
|
||||
// Really, it'd just be nice to have someone who knew what they
|
||||
// were doing lower SSA and do this right.
|
||||
|
||||
uint32_t block_ordinal = 0;
|
||||
uint32_t instr_ordinal = 0;
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
// Sequential block ordinals.
|
||||
block->ordinal = block_ordinal++;
|
||||
|
||||
// Reset all state.
|
||||
PrepareBlockState();
|
||||
|
||||
// Renumber all instructions in the block. This is required so that
|
||||
// we can sort the usage pointers below.
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
// Sequential global instruction ordinals.
|
||||
instr->ordinal = instr_ordinal++;
|
||||
instr = instr->next;
|
||||
}
|
||||
|
||||
instr = block->instr_head;
|
||||
while (instr) {
|
||||
const OpcodeInfo* info = instr->opcode;
|
||||
uint32_t signature = info->signature;
|
||||
|
||||
// Update the register use heaps.
|
||||
AdvanceUses(instr);
|
||||
|
||||
// Check sources for retirement. If any are unused after this instruction
|
||||
// we can eagerly evict them to speed up register allocation.
|
||||
// Since X64 (and other platforms) can often take advantage of dest==src1
|
||||
// register mappings we track retired src1 so that we can attempt to
|
||||
// reuse it.
|
||||
// NOTE: these checks require that the usage list be sorted!
|
||||
bool has_preferred_reg = false;
|
||||
RegAssignment preferred_reg = { 0 };
|
||||
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V &&
|
||||
!instr->src1.value->IsConstant()) {
|
||||
if (!instr->src1_use->next) {
|
||||
// Pull off preferred register. We will try to reuse this for the
|
||||
// dest.
|
||||
has_preferred_reg = true;
|
||||
preferred_reg = instr->src1.value->reg;
|
||||
XEASSERTNOTNULL(preferred_reg.set);
|
||||
}
|
||||
}
|
||||
|
||||
if (GET_OPCODE_SIG_TYPE_DEST(signature) == OPCODE_SIG_TYPE_V) {
|
||||
// Must not have been set already.
|
||||
XEASSERTNULL(instr->dest->reg.set);
|
||||
|
||||
// Sort the usage list. We depend on this in future uses of this variable.
|
||||
SortUsageList(instr->dest);
|
||||
|
||||
// If we have a preferred register, use that.
|
||||
// This way we can help along the stupid X86 two opcode instructions.
|
||||
bool allocated;
|
||||
if (has_preferred_reg) {
|
||||
// Allocate with the given preferred register. If the register is in
|
||||
// the wrong set it will not be reused.
|
||||
allocated = TryAllocateRegister(instr->dest, preferred_reg);
|
||||
} else {
|
||||
// Allocate a register. This will either reserve a free one or
|
||||
// spill and reuse an active one.
|
||||
allocated = TryAllocateRegister(instr->dest);
|
||||
}
|
||||
if (!allocated) {
|
||||
// Failed to allocate register -- need to spill and try again.
|
||||
// We spill only those registers we aren't using.
|
||||
if (!SpillOneRegister(builder, instr->dest->type)) {
|
||||
// Unable to spill anything - this shouldn't happen.
|
||||
XELOGE("Unable to spill any registers");
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Demand allocation.
|
||||
if (!TryAllocateRegister(instr->dest)) {
|
||||
// Boned.
|
||||
XELOGE("Register allocation failed");
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
instr = instr->next;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RegisterAllocationPass::DumpUsage(const char* name) {
|
||||
#if 0
|
||||
fprintf(stdout, "\n%s:\n", name);
|
||||
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
|
||||
auto usage_set = usage_sets_.all_sets[i];
|
||||
if (usage_set) {
|
||||
fprintf(stdout, "set %s:\n", usage_set->set->name);
|
||||
fprintf(stdout, " avail: %s\n", usage_set->availability.to_string().c_str());
|
||||
fprintf(stdout, " upcoming uses:\n");
|
||||
for (auto it = usage_set->upcoming_uses.begin();
|
||||
it != usage_set->upcoming_uses.end(); ++it) {
|
||||
fprintf(stdout, " v%d, used at %d\n",
|
||||
it->value->ordinal,
|
||||
it->use->instr->ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void RegisterAllocationPass::PrepareBlockState() {
|
||||
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
|
||||
auto usage_set = usage_sets_.all_sets[i];
|
||||
if (usage_set) {
|
||||
usage_set->availability.set();
|
||||
usage_set->upcoming_uses.clear();
|
||||
}
|
||||
}
|
||||
DumpUsage("PrepareBlockState");
|
||||
}
|
||||
|
||||
void RegisterAllocationPass::AdvanceUses(Instr* instr) {
|
||||
for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) {
|
||||
auto usage_set = usage_sets_.all_sets[i];
|
||||
if (!usage_set) {
|
||||
break;
|
||||
}
|
||||
auto& upcoming_uses = usage_set->upcoming_uses;
|
||||
for (auto it = upcoming_uses.begin(); it != upcoming_uses.end();) {
|
||||
if (!it->use) {
|
||||
// No uses at all - we can remove right away.
|
||||
// This comes up from instructions where the dest is never used,
|
||||
// like the ATOMIC ops.
|
||||
MarkRegAvailable(it->value->reg);
|
||||
it = upcoming_uses.erase(it);
|
||||
continue;
|
||||
}
|
||||
if (it->use->instr != instr) {
|
||||
// Not yet at this instruction.
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
// The use is from this instruction.
|
||||
if (!it->use->next) {
|
||||
// Last use of the value. We can retire it now.
|
||||
MarkRegAvailable(it->value->reg);
|
||||
it = upcoming_uses.erase(it);
|
||||
} else {
|
||||
// Used again. Push back the next use.
|
||||
// Note that we may be used multiple times this instruction, so
|
||||
// eat those.
|
||||
auto next_use = it->use->next;
|
||||
while (next_use->next && next_use->instr == instr) {
|
||||
next_use = next_use->next;
|
||||
}
|
||||
// Remove the iterator.
|
||||
auto value = it->value;
|
||||
it = upcoming_uses.erase(it);
|
||||
upcoming_uses.emplace_back(value, next_use);
|
||||
}
|
||||
}
|
||||
}
|
||||
DumpUsage("AdvanceUses");
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::IsRegInUse(const RegAssignment& reg) {
|
||||
RegisterSetUsage* usage_set;
|
||||
if (reg.set == usage_sets_.int_set->set) {
|
||||
usage_set = usage_sets_.int_set;
|
||||
} else if (reg.set == usage_sets_.float_set->set) {
|
||||
usage_set = usage_sets_.float_set;
|
||||
} else {
|
||||
usage_set = usage_sets_.vec_set;
|
||||
}
|
||||
return !usage_set->availability.test(reg.index);
|
||||
}
|
||||
|
||||
RegisterAllocationPass::RegisterSetUsage*
|
||||
RegisterAllocationPass::MarkRegUsed(const RegAssignment& reg,
|
||||
Value* value, Value::Use* use) {
|
||||
auto usage_set = RegisterSetForValue(value);
|
||||
usage_set->availability.set(reg.index, false);
|
||||
usage_set->upcoming_uses.emplace_back(value, use);
|
||||
DumpUsage("MarkRegUsed");
|
||||
return usage_set;
|
||||
}
|
||||
|
||||
RegisterAllocationPass::RegisterSetUsage*
|
||||
RegisterAllocationPass::MarkRegAvailable(const hir::RegAssignment& reg) {
|
||||
RegisterSetUsage* usage_set;
|
||||
if (reg.set == usage_sets_.int_set->set) {
|
||||
usage_set = usage_sets_.int_set;
|
||||
} else if (reg.set == usage_sets_.float_set->set) {
|
||||
usage_set = usage_sets_.float_set;
|
||||
} else {
|
||||
usage_set = usage_sets_.vec_set;
|
||||
}
|
||||
usage_set->availability.set(reg.index, true);
|
||||
return usage_set;
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::TryAllocateRegister(
|
||||
Value* value, const RegAssignment& preferred_reg) {
|
||||
// If the preferred register matches type and is available, use it.
|
||||
auto usage_set = RegisterSetForValue(value);
|
||||
if (usage_set->set == preferred_reg.set) {
|
||||
// Check if available.
|
||||
if (!IsRegInUse(preferred_reg)) {
|
||||
// Mark as in-use and return. Best case.
|
||||
MarkRegUsed(preferred_reg, value, value->use_head);
|
||||
value->reg = preferred_reg;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, fallback to allocating like normal.
|
||||
return TryAllocateRegister(value);
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::TryAllocateRegister(Value* value) {
|
||||
// Get the set this register is in.
|
||||
RegisterSetUsage* usage_set = RegisterSetForValue(value);
|
||||
|
||||
// Find the first free register, if any.
|
||||
// We have to ensure it's a valid one (in our count).
|
||||
unsigned long first_unused = 0;
|
||||
bool all_used = _BitScanForward(&first_unused, usage_set->availability.to_ulong()) == 0;
|
||||
if (!all_used && first_unused < usage_set->count) {
|
||||
// Available! Use it!.
|
||||
value->reg.set = usage_set->set;
|
||||
value->reg.index = first_unused;
|
||||
MarkRegUsed(value->reg, value, value->use_head);
|
||||
return true;
|
||||
}
|
||||
|
||||
// None available! Spill required.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RegisterAllocationPass::SpillOneRegister(
|
||||
HIRBuilder* builder, TypeName required_type) {
|
||||
// Get the set that we will be picking from.
|
||||
RegisterSetUsage* usage_set;
|
||||
if (required_type <= INT64_TYPE) {
|
||||
usage_set = usage_sets_.int_set;
|
||||
} else if (required_type <= FLOAT64_TYPE) {
|
||||
usage_set = usage_sets_.float_set;
|
||||
} else {
|
||||
usage_set = usage_sets_.vec_set;
|
||||
}
|
||||
|
||||
DumpUsage("SpillOneRegister (pre)");
|
||||
// Pick the one with the furthest next use.
|
||||
XEASSERT(!usage_set->upcoming_uses.empty());
|
||||
auto furthest_usage = std::max_element(
|
||||
usage_set->upcoming_uses.begin(), usage_set->upcoming_uses.end(),
|
||||
RegisterUsage::Comparer());
|
||||
Value* spill_value = furthest_usage->value;
|
||||
Value::Use* prev_use = furthest_usage->use->prev;
|
||||
Value::Use* next_use = furthest_usage->use;
|
||||
XEASSERTNOTNULL(next_use);
|
||||
usage_set->upcoming_uses.erase(furthest_usage);
|
||||
DumpUsage("SpillOneRegister (post)");
|
||||
const auto reg = spill_value->reg;
|
||||
|
||||
// We know the spill_value use list is sorted, so we can cut it right now.
|
||||
// This makes it easier down below.
|
||||
auto new_head_use = next_use;
|
||||
|
||||
// Allocate local.
|
||||
if (spill_value->local_slot) {
|
||||
// Value is already assigned a slot. Since we allocate in order and this is
|
||||
// all SSA we know the stored value will be exactly what we want. Yay,
|
||||
// we can prevent the redundant store!
|
||||
// In fact, we may even want to pin this spilled value so that we always
|
||||
// use the spilled value and prevent the need for more locals.
|
||||
} else {
|
||||
// Allocate a local slot.
|
||||
spill_value->local_slot = builder->AllocLocal(spill_value->type);
|
||||
|
||||
// Add store.
|
||||
builder->StoreLocal(spill_value->local_slot, spill_value);
|
||||
auto spill_store = builder->last_instr();
|
||||
auto spill_store_use = spill_store->src2_use;
|
||||
XEASSERTNULL(spill_store_use->prev);
|
||||
if (prev_use && prev_use->instr->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
|
||||
// Instruction is paired. This is bad. We will insert the spill after the
|
||||
// paired instruction.
|
||||
XEASSERTNOTNULL(prev_use->instr->next);
|
||||
spill_store->MoveBefore(prev_use->instr->next);
|
||||
|
||||
// Update last use.
|
||||
spill_value->last_use = spill_store;
|
||||
} else if (prev_use) {
|
||||
// We insert the store immediately before the previous use.
|
||||
// If we were smarter we could then re-run allocation and reuse the register
|
||||
// once dropped.
|
||||
spill_store->MoveBefore(prev_use->instr);
|
||||
|
||||
// Update last use.
|
||||
spill_value->last_use = prev_use->instr;
|
||||
} else {
|
||||
// This is the first use, so the only thing we have is the define.
|
||||
// Move the store to right after that.
|
||||
spill_store->MoveBefore(spill_value->def->next);
|
||||
|
||||
// Update last use.
|
||||
spill_value->last_use = spill_store;
|
||||
}
|
||||
}
|
||||
|
||||
#if ASSERT_NO_CYCLES
|
||||
builder->AssertNoCycles();
|
||||
spill_value->def->block->AssertNoCycles();
|
||||
#endif // ASSERT_NO_CYCLES
|
||||
|
||||
// Add load.
|
||||
// Inserted immediately before the next use. Since by definition the next
|
||||
// use is after the instruction requesting the spill we know we haven't
|
||||
// done allocation for that code yet and can let that be handled
|
||||
// automatically when we get to it.
|
||||
auto new_value = builder->LoadLocal(spill_value->local_slot);
|
||||
auto spill_load = builder->last_instr();
|
||||
spill_load->MoveBefore(next_use->instr);
|
||||
// Note: implicit first use added.
|
||||
|
||||
#if ASSERT_NO_CYCLES
|
||||
builder->AssertNoCycles();
|
||||
spill_value->def->block->AssertNoCycles();
|
||||
#endif // ASSERT_NO_CYCLES
|
||||
|
||||
// Set the local slot of the new value to our existing one. This way we will
|
||||
// reuse that same memory if needed.
|
||||
new_value->local_slot = spill_value->local_slot;
|
||||
|
||||
// Rename all future uses of the SSA value to the new value as loaded
|
||||
// from the local.
|
||||
// We can quickly do this by walking the use list. Because the list is
|
||||
// already sorted we know we are going to end up with a sorted list.
|
||||
auto walk_use = new_head_use;
|
||||
auto new_use_tail = walk_use;
|
||||
while (walk_use) {
|
||||
auto next_walk_use = walk_use->next;
|
||||
auto instr = walk_use->instr;
|
||||
|
||||
uint32_t signature = instr->opcode->signature;
|
||||
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src1.value == spill_value) {
|
||||
instr->set_src1(new_value);
|
||||
}
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src2.value == spill_value) {
|
||||
instr->set_src2(new_value);
|
||||
}
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (instr->src3.value == spill_value) {
|
||||
instr->set_src3(new_value);
|
||||
}
|
||||
}
|
||||
|
||||
walk_use = next_walk_use;
|
||||
if (walk_use) {
|
||||
new_use_tail = walk_use;
|
||||
}
|
||||
}
|
||||
new_value->last_use = new_use_tail->instr;
|
||||
|
||||
// Update tracking.
|
||||
MarkRegAvailable(reg);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
RegisterAllocationPass::RegisterSetUsage*
|
||||
RegisterAllocationPass::RegisterSetForValue(
|
||||
const Value* value) {
|
||||
if (value->type <= INT64_TYPE) {
|
||||
return usage_sets_.int_set;
|
||||
} else if (value->type <= FLOAT64_TYPE) {
|
||||
return usage_sets_.float_set;
|
||||
} else {
|
||||
return usage_sets_.vec_set;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
int CompareValueUse(const Value::Use* a, const Value::Use* b) {
|
||||
return a->instr->ordinal - b->instr->ordinal;
|
||||
}
|
||||
} // namespace
|
||||
void RegisterAllocationPass::SortUsageList(Value* value) {
|
||||
// Modified in-place linked list sort from:
|
||||
// http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.c
|
||||
if (!value->use_head) {
|
||||
return;
|
||||
}
|
||||
Value::Use* head = value->use_head;
|
||||
Value::Use* tail = nullptr;
|
||||
int insize = 1;
|
||||
while (true) {
|
||||
auto p = head;
|
||||
head = nullptr;
|
||||
tail = nullptr;
|
||||
// count number of merges we do in this pass
|
||||
int nmerges = 0;
|
||||
while (p) {
|
||||
// there exists a merge to be done
|
||||
nmerges++;
|
||||
// step 'insize' places along from p
|
||||
auto q = p;
|
||||
int psize = 0;
|
||||
for (int i = 0; i < insize; i++) {
|
||||
psize++;
|
||||
q = q->next;
|
||||
if (!q) break;
|
||||
}
|
||||
// if q hasn't fallen off end, we have two lists to merge
|
||||
int qsize = insize;
|
||||
// now we have two lists; merge them
|
||||
while (psize > 0 || (qsize > 0 && q)) {
|
||||
// decide whether next element of merge comes from p or q
|
||||
Value::Use* e = nullptr;
|
||||
if (psize == 0) {
|
||||
// p is empty; e must come from q
|
||||
e = q; q = q->next; qsize--;
|
||||
} else if (qsize == 0 || !q) {
|
||||
// q is empty; e must come from p
|
||||
e = p; p = p->next; psize--;
|
||||
} else if (CompareValueUse(p, q) <= 0) {
|
||||
// First element of p is lower (or same); e must come from p
|
||||
e = p; p = p->next; psize--;
|
||||
} else {
|
||||
// First element of q is lower; e must come from q
|
||||
e = q; q = q->next; qsize--;
|
||||
}
|
||||
// add the next element to the merged list
|
||||
if (tail) {
|
||||
tail->next = e;
|
||||
} else {
|
||||
head = e;
|
||||
}
|
||||
// Maintain reverse pointers in a doubly linked list.
|
||||
e->prev = tail;
|
||||
tail = e;
|
||||
}
|
||||
// now p has stepped 'insize' places along, and q has too
|
||||
p = q;
|
||||
}
|
||||
if (tail) {
|
||||
tail->next = nullptr;
|
||||
}
|
||||
// If we have done only one merge, we're finished
|
||||
if (nmerges <= 1) {
|
||||
// allow for nmerges==0, the empty list case
|
||||
break;
|
||||
}
|
||||
// Otherwise repeat, merging lists twice the size
|
||||
insize *= 2;
|
||||
}
|
||||
|
||||
value->use_head = head;
|
||||
value->last_use = tail->instr;
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
||||
#define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <vector>
|
||||
|
||||
#include <alloy/backend/machine_info.h>
|
||||
#include <alloy/compiler/compiler_pass.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace compiler {
|
||||
namespace passes {
|
||||
|
||||
|
||||
class RegisterAllocationPass : public CompilerPass {
|
||||
public:
|
||||
RegisterAllocationPass(const backend::MachineInfo* machine_info);
|
||||
virtual ~RegisterAllocationPass();
|
||||
|
||||
virtual int Run(hir::HIRBuilder* builder);
|
||||
|
||||
private:
|
||||
// TODO(benvanik): rewrite all this set shit -- too much indirection, the
|
||||
// complexity is not needed.
|
||||
struct RegisterUsage {
|
||||
hir::Value* value;
|
||||
hir::Value::Use* use;
|
||||
RegisterUsage() : value(nullptr), use(nullptr) {}
|
||||
RegisterUsage(hir::Value* value_, hir::Value::Use* use_)
|
||||
: value(value_), use(use_) {}
|
||||
struct Comparer : std::binary_function<RegisterUsage, RegisterUsage, bool> {
|
||||
bool operator()(const RegisterUsage& a, const RegisterUsage& b) const {
|
||||
return a.use->instr->ordinal < b.use->instr->ordinal;
|
||||
}
|
||||
};
|
||||
};
|
||||
struct RegisterSetUsage {
|
||||
const backend::MachineInfo::RegisterSet* set = nullptr;
|
||||
uint32_t count = 0;
|
||||
std::bitset<32> availability = 0;
|
||||
// TODO(benvanik): another data type.
|
||||
std::vector<RegisterUsage> upcoming_uses;
|
||||
};
|
||||
|
||||
void DumpUsage(const char* name);
|
||||
void PrepareBlockState();
|
||||
void AdvanceUses(hir::Instr* instr);
|
||||
bool IsRegInUse(const hir::RegAssignment& reg);
|
||||
RegisterSetUsage* MarkRegUsed(const hir::RegAssignment& reg,
|
||||
hir::Value* value, hir::Value::Use* use);
|
||||
RegisterSetUsage* MarkRegAvailable(const hir::RegAssignment& reg);
|
||||
|
||||
bool TryAllocateRegister(hir::Value* value,
|
||||
const hir::RegAssignment& preferred_reg);
|
||||
bool TryAllocateRegister(hir::Value* value);
|
||||
bool SpillOneRegister(hir::HIRBuilder* builder, hir::TypeName required_type);
|
||||
|
||||
RegisterSetUsage* RegisterSetForValue(const hir::Value* value);
|
||||
|
||||
void SortUsageList(hir::Value* value);
|
||||
|
||||
private:
|
||||
const backend::MachineInfo* machine_info_;
|
||||
struct {
|
||||
RegisterSetUsage* int_set = nullptr;
|
||||
RegisterSetUsage* float_set = nullptr;
|
||||
RegisterSetUsage* vec_set = nullptr;
|
||||
RegisterSetUsage* all_sets[3];
|
||||
} usage_sets_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace passes
|
||||
} // namespace compiler
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_
|
|
@ -23,6 +23,8 @@ SimplificationPass::~SimplificationPass() {
|
|||
}
|
||||
|
||||
int SimplificationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
EliminateConversions(builder);
|
||||
SimplifyAssignments(builder);
|
||||
return 0;
|
||||
|
|
|
@ -5,14 +5,22 @@
|
|||
'constant_propagation_pass.h',
|
||||
'context_promotion_pass.cc',
|
||||
'context_promotion_pass.h',
|
||||
'control_flow_analysis_pass.cc',
|
||||
'control_flow_analysis_pass.h',
|
||||
'data_flow_analysis_pass.cc',
|
||||
'data_flow_analysis_pass.h',
|
||||
'dead_code_elimination_pass.cc',
|
||||
'dead_code_elimination_pass.h',
|
||||
'finalization_pass.cc',
|
||||
'finalization_pass.h',
|
||||
#'dead_store_elimination_pass.cc',
|
||||
#'dead_store_elimination_pass.h',
|
||||
'register_allocation_pass.cc',
|
||||
'register_allocation_pass.h',
|
||||
'simplification_pass.cc',
|
||||
'simplification_pass.h',
|
||||
'validation_pass.cc',
|
||||
'validation_pass.h',
|
||||
'value_reduction_pass.cc',
|
||||
'value_reduction_pass.h',
|
||||
],
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/compiler/passes/validation_pass.h>
|
||||
|
||||
#include <alloy/backend/backend.h>
|
||||
#include <alloy/compiler/compiler.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
using namespace alloy::compiler;
|
||||
using namespace alloy::compiler::passes;
|
||||
using namespace alloy::frontend;
|
||||
using namespace alloy::hir;
|
||||
using namespace alloy::runtime;
|
||||
|
||||
|
||||
ValidationPass::ValidationPass() :
|
||||
CompilerPass() {
|
||||
}
|
||||
|
||||
ValidationPass::~ValidationPass() {
|
||||
}
|
||||
|
||||
int ValidationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
StringBuffer str;
|
||||
builder->Dump(&str);
|
||||
printf(str.GetString());
|
||||
fflush(stdout);
|
||||
str.Reset();
|
||||
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
auto label = block->label_head;
|
||||
while (label) {
|
||||
XEASSERT(label->block == block);
|
||||
if (label->block != block) {
|
||||
return 1;
|
||||
}
|
||||
label = label->next;
|
||||
}
|
||||
|
||||
auto instr = block->instr_head;
|
||||
while (instr) {
|
||||
if (ValidateInstruction(block, instr)) {
|
||||
return 1;
|
||||
}
|
||||
instr = instr->next;
|
||||
}
|
||||
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ValidationPass::ValidateInstruction(Block* block, Instr* instr) {
|
||||
XEASSERT(instr->block == block);
|
||||
if (instr->block != block) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t signature = instr->opcode->signature;
|
||||
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (ValidateValue(block, instr, instr->src1.value)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (ValidateValue(block, instr, instr->src2.value)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
|
||||
if (ValidateValue(block, instr, instr->src3.value)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ValidationPass::ValidateValue(Block* block, Instr* instr, Value* value) {
|
||||
//if (value->def) {
|
||||
// auto def = value->def;
|
||||
// XEASSERT(def->block == block);
|
||||
// if (def->block != block) {
|
||||
// return 1;
|
||||
// }
|
||||
//}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_COMPILER_PASSES_VALIDATION_PASS_H_
|
||||
#define ALLOY_COMPILER_PASSES_VALIDATION_PASS_H_
|
||||
|
||||
#include <alloy/compiler/compiler_pass.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace compiler {
|
||||
namespace passes {
|
||||
|
||||
|
||||
class ValidationPass : public CompilerPass {
|
||||
public:
|
||||
ValidationPass();
|
||||
virtual ~ValidationPass();
|
||||
|
||||
virtual int Run(hir::HIRBuilder* builder);
|
||||
|
||||
private:
|
||||
int ValidateInstruction(hir::Block* block, hir::Instr* instr);
|
||||
int ValidateValue(hir::Block* block, hir::Instr* instr, hir::Value* value);
|
||||
};
|
||||
|
||||
|
||||
} // namespace passes
|
||||
} // namespace compiler
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_COMPILER_PASSES_VALIDATION_PASS_H_
|
|
@ -13,7 +13,11 @@
|
|||
#include <alloy/compiler/compiler.h>
|
||||
#include <alloy/runtime/runtime.h>
|
||||
|
||||
#include <bitset>
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4244)
|
||||
#pragma warning(disable : 4267)
|
||||
#include <llvm/ADT/BitVector.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend;
|
||||
|
@ -49,10 +53,11 @@ void ValueReductionPass::ComputeLastUse(Value* value) {
|
|||
}
|
||||
|
||||
int ValueReductionPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Walk each block and reuse variable ordinals as much as possible.
|
||||
|
||||
// Let's hope this is enough.
|
||||
std::bitset<1024> ordinals;
|
||||
llvm::BitVector ordinals(builder->max_value_ordinal());
|
||||
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
|
@ -74,34 +79,40 @@ int ValueReductionPass::Run(HIRBuilder* builder) {
|
|||
OpcodeSignatureType src1_type = GET_OPCODE_SIG_TYPE_SRC1(info->signature);
|
||||
OpcodeSignatureType src2_type = GET_OPCODE_SIG_TYPE_SRC2(info->signature);
|
||||
OpcodeSignatureType src3_type = GET_OPCODE_SIG_TYPE_SRC3(info->signature);
|
||||
if (src1_type == OPCODE_SIG_TYPE_V && !instr->src1.value->IsConstant()) {
|
||||
if (src1_type == OPCODE_SIG_TYPE_V) {
|
||||
auto v = instr->src1.value;
|
||||
if (!v->last_use) {
|
||||
ComputeLastUse(v);
|
||||
}
|
||||
if (v->last_use == instr) {
|
||||
// Available.
|
||||
ordinals.set(v->ordinal, false);
|
||||
if (!instr->src1.value->IsConstant()) {
|
||||
ordinals.reset(v->ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (src2_type == OPCODE_SIG_TYPE_V && !instr->src2.value->IsConstant()) {
|
||||
if (src2_type == OPCODE_SIG_TYPE_V) {
|
||||
auto v = instr->src2.value;
|
||||
if (!v->last_use) {
|
||||
ComputeLastUse(v);
|
||||
}
|
||||
if (v->last_use == instr) {
|
||||
// Available.
|
||||
ordinals.set(v->ordinal, false);
|
||||
if (!instr->src2.value->IsConstant()) {
|
||||
ordinals.reset(v->ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (src3_type == OPCODE_SIG_TYPE_V && !instr->src3.value->IsConstant()) {
|
||||
if (src3_type == OPCODE_SIG_TYPE_V) {
|
||||
auto v = instr->src3.value;
|
||||
if (!v->last_use) {
|
||||
ComputeLastUse(v);
|
||||
}
|
||||
if (v->last_use == instr) {
|
||||
// Available.
|
||||
ordinals.set(v->ordinal, false);
|
||||
if (!instr->src3.value->IsConstant()) {
|
||||
ordinals.reset(v->ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dest_type == OPCODE_SIG_TYPE_V) {
|
||||
|
@ -109,7 +120,7 @@ int ValueReductionPass::Run(HIRBuilder* builder) {
|
|||
// source value ordinal.
|
||||
auto v = instr->dest;
|
||||
// Find a lower ordinal.
|
||||
for (auto n = 0; n < ordinals.size(); n++) {
|
||||
for (auto n = 0u; n < ordinals.size(); n++) {
|
||||
if (!ordinals.test(n)) {
|
||||
ordinals.set(n);
|
||||
v->ordinal = n;
|
||||
|
|
|
@ -27,10 +27,10 @@ public:
|
|||
ALLOY_COMPILER_DEINIT = ALLOY_COMPILER | (2),
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
typedef struct Init_s {
|
||||
static const uint32_t event_type = ALLOY_COMPILER_INIT;
|
||||
} Init;
|
||||
typedef struct {
|
||||
typedef struct Deinit_s {
|
||||
static const uint32_t event_type = ALLOY_COMPILER_DEINIT;
|
||||
} Deinit;
|
||||
};
|
||||
|
|
|
@ -44,7 +44,33 @@ typedef struct XECACHEALIGN vec128_s {
|
|||
uint64_t high;
|
||||
};
|
||||
};
|
||||
|
||||
bool operator== (const vec128_s& b) const {
|
||||
return low == b.low && high == b.high;
|
||||
}
|
||||
} vec128_t;
|
||||
XEFORCEINLINE vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) {
|
||||
vec128_t v;
|
||||
v.i4[0] = x; v.i4[1] = y; v.i4[2] = z; v.i4[3] = w;
|
||||
return v;
|
||||
}
|
||||
XEFORCEINLINE vec128_t vec128f(float x, float y, float z, float w) {
|
||||
vec128_t v;
|
||||
v.f4[0] = x; v.f4[1] = y; v.f4[2] = z; v.f4[3] = w;
|
||||
return v;
|
||||
}
|
||||
XEFORCEINLINE vec128_t vec128b(
|
||||
uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3,
|
||||
uint8_t y0, uint8_t y1, uint8_t y2, uint8_t y3,
|
||||
uint8_t z0, uint8_t z1, uint8_t z2, uint8_t z3,
|
||||
uint8_t w0, uint8_t w1, uint8_t w2, uint8_t w3) {
|
||||
vec128_t v;
|
||||
v.b16[0] = x3; v.b16[1] = x2; v.b16[2] = x1; v.b16[3] = x0;
|
||||
v.b16[4] = y3; v.b16[5] = y2; v.b16[6] = y1; v.b16[7] = y0;
|
||||
v.b16[8] = z3; v.b16[9] = z2; v.b16[10] = z1; v.b16[11] = z0;
|
||||
v.b16[12] = w3; v.b16[13] = w2; v.b16[14] = w1; v.b16[15] = w0;
|
||||
return v;
|
||||
}
|
||||
|
||||
} // namespace alloy
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define ALLOY_DELEGATE_H_
|
||||
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/mutex.h>
|
||||
|
|
|
@ -67,6 +67,8 @@ typedef struct XECACHEALIGN64 PPCContext_s {
|
|||
// Must be stored at 0x0 for now.
|
||||
// TODO(benvanik): find a nice way to describe this to the JIT.
|
||||
runtime::ThreadState* thread_state;
|
||||
// TODO(benvanik): this is getting nasty. Must be here.
|
||||
uint8_t* membase;
|
||||
|
||||
// Most frequently used registers first.
|
||||
uint64_t r[32]; // General purpose registers
|
||||
|
@ -196,7 +198,6 @@ typedef struct XECACHEALIGN64 PPCContext_s {
|
|||
|
||||
// Runtime-specific data pointer. Used on callbacks to get access to the
|
||||
// current runtime and its data.
|
||||
uint8_t* membase;
|
||||
runtime::Runtime* runtime;
|
||||
volatile int suspend_flag;
|
||||
|
||||
|
|
|
@ -115,7 +115,7 @@ void Disasm_X_RA_RB(InstrData& i, StringBuffer* str) {
|
|||
i.X.RA, i.X.RB);
|
||||
}
|
||||
void Disasm_XO_RT_RA_RB(InstrData& i, StringBuffer* str) {
|
||||
str->Append("%*s%s%s r%d, r%d", i.XO.Rc ? -7 : -8, i.type->name,
|
||||
str->Append("%*s%s%s r%d, r%d, r%d", i.XO.Rc ? -7 : -8, i.type->name,
|
||||
i.XO.OE ? "o" : "", i.XO.Rc ? "." : "",
|
||||
i.XO.RT, i.XO.RA, i.XO.RB);
|
||||
}
|
||||
|
@ -266,7 +266,7 @@ void Disasm_dcbz(InstrData& i, StringBuffer* str) {
|
|||
}
|
||||
|
||||
void Disasm_fcmp(InstrData& i, StringBuffer* str) {
|
||||
str->Append("%-8s cr%d, r%d, r%d", i.type->name,
|
||||
str->Append("%-8s cr%d, f%d, f%d", i.type->name,
|
||||
i.X.RT >> 2, i.X.RA, i.X.RB);
|
||||
}
|
||||
|
||||
|
|
|
@ -105,6 +105,10 @@ Value* CalculateEA_0(PPCHIRBuilder& f, uint32_t ra, uint32_t rb);
|
|||
|
||||
// }
|
||||
|
||||
unsigned int xerotl(unsigned int value, unsigned int shift) {
|
||||
XEASSERT(shift < 32);
|
||||
return shift == 0 ? value : ((value << shift) | (value >> (32 - shift)));
|
||||
}
|
||||
|
||||
XEEMITTER(dst, 0x7C0002AC, XDSS)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
|
@ -1797,7 +1801,7 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f, InstrData
|
|||
// http://hlssmod.net/he_code/public/pixelwriter.h
|
||||
// control = prev:0123 | new:4567
|
||||
uint32_t control = 0x00010203; // original
|
||||
uint32_t src = _rotl(0x04050607, shift * 8);
|
||||
uint32_t src = xerotl(0x04050607, shift * 8);
|
||||
uint32_t mask = 0;
|
||||
switch (pack) {
|
||||
case 1: // VPACK_32
|
||||
|
|
|
@ -643,20 +643,20 @@ XEEMITTER(cmpli, 0x28000000, D )(PPCHIRBuilder& f, InstrData& i) {
|
|||
XEEMITTER(andx, 0x7C000038, X )(PPCHIRBuilder& f, InstrData& i) {
|
||||
// RA <- (RS) & (RB)
|
||||
Value* ra = f.And(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(andcx, 0x7C000078, X )(PPCHIRBuilder& f, InstrData& i) {
|
||||
// RA <- (RS) & ¬(RB)
|
||||
Value* ra = f.And(f.LoadGPR(i.X.RT), f.Not(f.LoadGPR(i.X.RB)));
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -665,8 +665,8 @@ XEEMITTER(andix, 0x70000000, D )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* ra = f.And(
|
||||
f.LoadGPR(i.D.RT),
|
||||
f.LoadConstant((uint64_t)i.D.DS));
|
||||
f.UpdateCR(0, ra);
|
||||
f.StoreGPR(i.D.RA, ra);
|
||||
f.UpdateCR(0, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -675,8 +675,8 @@ XEEMITTER(andisx, 0x74000000, D )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* ra = f.And(
|
||||
f.LoadGPR(i.D.RT),
|
||||
f.LoadConstant((uint64_t(i.D.DS) << 16)));
|
||||
f.UpdateCR(0, ra);
|
||||
f.StoreGPR(i.D.RA, ra);
|
||||
f.UpdateCR(0, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -688,10 +688,10 @@ XEEMITTER(cntlzdx, 0x7C000074, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA <- n
|
||||
Value* v = f.CountLeadingZeros(f.LoadGPR(i.X.RT));
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -704,10 +704,10 @@ XEEMITTER(cntlzwx, 0x7C000034, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* v = f.CountLeadingZeros(
|
||||
f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -715,10 +715,10 @@ XEEMITTER(eqvx, 0x7C000238, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA <- (RS) == (RB)
|
||||
Value* ra = f.Xor(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
|
||||
ra = f.Not(ra);
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -728,10 +728,10 @@ XEEMITTER(extsbx, 0x7C000774, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA[0:55] <- i56.s
|
||||
Value* rt = f.LoadGPR(i.X.RT);
|
||||
rt = f.SignExtend(f.Truncate(rt, INT8_TYPE), INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, rt);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -741,10 +741,10 @@ XEEMITTER(extshx, 0x7C000734, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA[0:47] <- 48.s
|
||||
Value* rt = f.LoadGPR(i.X.RT);
|
||||
rt = f.SignExtend(f.Truncate(rt, INT16_TYPE), INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, rt);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -754,10 +754,10 @@ XEEMITTER(extswx, 0x7C0007B4, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// RA[0:31] <- i32.s
|
||||
Value* rt = f.LoadGPR(i.X.RT);
|
||||
rt = f.SignExtend(f.Truncate(rt, INT32_TYPE), INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, rt);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, rt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -767,10 +767,10 @@ XEEMITTER(nandx, 0x7C0003B8, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
f.LoadGPR(i.X.RT),
|
||||
f.LoadGPR(i.X.RB));
|
||||
ra = f.Not(ra);
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -780,10 +780,10 @@ XEEMITTER(norx, 0x7C0000F8, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
f.LoadGPR(i.X.RT),
|
||||
f.LoadGPR(i.X.RB));
|
||||
ra = f.Not(ra);
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -803,10 +803,10 @@ XEEMITTER(orx, 0x7C000378, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
f.LoadGPR(i.X.RT),
|
||||
f.LoadGPR(i.X.RB));
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -815,10 +815,10 @@ XEEMITTER(orcx, 0x7C000338, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* ra = f.Or(
|
||||
f.LoadGPR(i.X.RT),
|
||||
f.Not(f.LoadGPR(i.X.RB)));
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -849,10 +849,10 @@ XEEMITTER(xorx, 0x7C000278, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* ra = f.Xor(
|
||||
f.LoadGPR(i.X.RT),
|
||||
f.LoadGPR(i.X.RB));
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, ra);
|
||||
}
|
||||
f.StoreGPR(i.X.RA, ra);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -895,10 +895,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
|
|||
if (m != 0xFFFFFFFFFFFFFFFF) {
|
||||
v = f.And(v, f.LoadConstant(m));
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
if (i.MD.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
return 0;
|
||||
} else if (i.MD.idx == 1) {
|
||||
// XEEMITTER(rldicrx, 0x78000004, MD )
|
||||
|
@ -922,10 +922,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
|
|||
v = f.And(v, f.LoadConstant(m));
|
||||
}
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
if (i.MD.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
return 0;
|
||||
} else if (i.MD.idx == 2) {
|
||||
// XEEMITTER(rldicx, 0x78000008, MD )
|
||||
|
@ -959,10 +959,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) {
|
|||
f.And(v, f.LoadConstant(m)),
|
||||
f.And(ra, f.LoadConstant(~m)));
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
if (i.MD.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.MD.RA, v);
|
||||
return 0;
|
||||
} else {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
|
@ -987,10 +987,10 @@ XEEMITTER(rlwimix, 0x50000000, M )(PPCHIRBuilder& f, InstrData& i) {
|
|||
}
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstant((~(uint64_t)m))));
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
if (i.M.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1014,10 +1014,10 @@ XEEMITTER(rlwinmx, 0x54000000, M )(PPCHIRBuilder& f, InstrData& i) {
|
|||
v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32)));
|
||||
}
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
if (i.M.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1027,7 +1027,8 @@ XEEMITTER(rlwnmx, 0x5C000000, M )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// m <- MASK(MB+32, ME+32)
|
||||
// RA <- r & m
|
||||
Value* v = f.Truncate(f.LoadGPR(i.M.RT), INT32_TYPE);
|
||||
Value* sh = f.And(f.LoadGPR(i.M.SH), f.LoadConstant(0x1F));
|
||||
Value* sh = f.And(f.Truncate(f.LoadGPR(i.M.SH), INT32_TYPE),
|
||||
f.LoadConstant(0x1F));
|
||||
v = f.RotateLeft(v, sh);
|
||||
// Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here
|
||||
// as our truncation/zero-extend does it for us.
|
||||
|
@ -1035,10 +1036,10 @@ XEEMITTER(rlwnmx, 0x5C000000, M )(PPCHIRBuilder& f, InstrData& i) {
|
|||
v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32)));
|
||||
}
|
||||
v = f.ZeroExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
if (i.M.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1145,7 +1146,7 @@ XEEMITTER(sradx, 0x7C000634, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// CA is set to 1 if the low-order 32 bits of (RS) contain a negative number
|
||||
// and any 1-bits are shifted out of position 63; otherwise CA is set to 0.
|
||||
// We already have ca set to indicate the pos 63 bit, now just and in sign.
|
||||
ca = f.And(ca, f.Shr(v, 63));
|
||||
ca = f.And(ca, f.Truncate(f.Shr(v, 63), INT8_TYPE));
|
||||
|
||||
f.StoreCA(ca);
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
|
@ -1173,15 +1174,15 @@ XEEMITTER(sradix, 0x7C000674, XS )(PPCHIRBuilder& f, InstrData& i) {
|
|||
XEASSERT(sh);
|
||||
uint64_t mask = XEMASK(64 - sh, 63);
|
||||
Value* ca = f.And(
|
||||
f.Shr(v, 63),
|
||||
f.Truncate(f.Shr(v, 63), INT8_TYPE),
|
||||
f.IsTrue(f.And(v, f.LoadConstant(mask))));
|
||||
f.StoreCA(ca);
|
||||
|
||||
v = f.Sha(v, sh);
|
||||
f.StoreGPR(i.XS.RA, v);
|
||||
if (i.XS.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
f.StoreGPR(i.XS.RA, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1197,12 +1198,12 @@ XEEMITTER(srawx, 0x7C000630, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
Value* v = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
|
||||
Value* sh = f.And(
|
||||
f.Truncate(f.LoadGPR(i.X.RB), INT32_TYPE),
|
||||
f.LoadConstant((int8_t)0x7F));
|
||||
f.LoadConstant(0x7F));
|
||||
// CA is set if any bits are shifted out of the right and if the result
|
||||
// is negative.
|
||||
Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh));
|
||||
Value* ca = f.And(
|
||||
f.Shr(v, 31),
|
||||
f.Truncate(f.Shr(v, 31), INT8_TYPE),
|
||||
f.IsTrue(f.And(v, mask)));
|
||||
f.StoreCA(ca);
|
||||
v = f.Sha(v, sh),
|
||||
|
@ -1234,8 +1235,8 @@ XEEMITTER(srawix, 0x7C000670, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// is negative.
|
||||
uint32_t mask = (uint32_t)XEMASK(64 - i.X.RB, 63);
|
||||
ca = f.And(
|
||||
f.Shr(v, 31),
|
||||
f.ZeroExtend(f.IsTrue(f.And(v, f.LoadConstant(mask))), INT32_TYPE));
|
||||
f.Truncate(f.Shr(v, 31), INT8_TYPE),
|
||||
f.IsTrue(f.And(v, f.LoadConstant(mask))));
|
||||
|
||||
v = f.Sha(v, (int8_t)i.X.RB),
|
||||
v = f.SignExtend(v, INT64_TYPE);
|
||||
|
|
|
@ -35,6 +35,7 @@ int InstrEmit_branch(
|
|||
// be correct for returns.
|
||||
if (lk) {
|
||||
Value* return_address = f.LoadConstant(cia + 4);
|
||||
f.SetReturnAddress(return_address);
|
||||
f.StoreLR(return_address);
|
||||
}
|
||||
|
||||
|
@ -104,6 +105,10 @@ int InstrEmit_branch(
|
|||
// // TODO(benvanik): evaluate hint here.
|
||||
// c.je(e.GetReturnLabel(), kCondHintLikely);
|
||||
//}
|
||||
#if 0
|
||||
// This breaks longjump, as that uses blr with a non-return lr.
|
||||
// It'd be nice to move SET_RETURN_ADDRESS semantics up into context
|
||||
// so that we can just use this.
|
||||
if (!lk && nia_is_lr) {
|
||||
// Return (most likely).
|
||||
// TODO(benvanik): test? ReturnCheck()?
|
||||
|
@ -116,7 +121,14 @@ int InstrEmit_branch(
|
|||
f.Return();
|
||||
}
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
// Jump to pointer.
|
||||
bool likely_return = !lk && nia_is_lr;
|
||||
if (likely_return) {
|
||||
call_flags |= CALL_POSSIBLE_RETURN;
|
||||
}
|
||||
if (cond) {
|
||||
if (!expect_true) {
|
||||
cond = f.IsFalse(cond);
|
||||
|
@ -380,8 +392,8 @@ XEEMITTER(mcrf, 0x4C000000, XL )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// System linkage (A-24)
|
||||
|
||||
XEEMITTER(sc, 0x44000002, SC )(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
f.CallExtern(f.symbol_info());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -891,7 +891,8 @@ XEEMITTER(stfiwx, 0x7C0007AE, X )(PPCHIRBuilder& f, InstrData& i) {
|
|||
// EA <- b + (RB)
|
||||
// MEM(EA, 4) <- (FRS)[32:63]
|
||||
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
||||
f.Store(ea, f.ByteSwap(f.Cast(f.LoadFPR(i.X.RT), INT32_TYPE)));
|
||||
f.Store(ea, f.ByteSwap(
|
||||
f.Truncate(f.Cast(f.LoadFPR(i.X.RT), INT64_TYPE), INT32_TYPE)));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include <alloy/frontend/ppc/ppc_hir_builder.h>
|
||||
|
||||
#include <alloy/alloy-private.h>
|
||||
#include <alloy/frontend/tracing.h>
|
||||
#include <alloy/frontend/ppc/ppc_context.h>
|
||||
#include <alloy/frontend/ppc/ppc_disasm.h>
|
||||
|
@ -43,6 +44,8 @@ void PPCHIRBuilder::Reset() {
|
|||
}
|
||||
|
||||
int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
Memory* memory = frontend_->memory();
|
||||
const uint8_t* p = memory->membase();
|
||||
|
||||
|
@ -125,10 +128,10 @@ int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) {
|
|||
typedef int (*InstrEmitter)(PPCHIRBuilder& f, InstrData& i);
|
||||
InstrEmitter emit = (InstrEmitter)i.type->emit;
|
||||
|
||||
/*if (i.address == FLAGS_break_on_instruction) {
|
||||
if (i.address == FLAGS_break_on_instruction) {
|
||||
Comment("--break-on-instruction target");
|
||||
DebugBreak();
|
||||
}*/
|
||||
}
|
||||
|
||||
if (!i.type->emit || emit(*this, i)) {
|
||||
XELOGCPU("Unimplemented instr %.8X %.8X %s",
|
||||
|
@ -239,18 +242,18 @@ void PPCHIRBuilder::UpdateCR(
|
|||
|
||||
void PPCHIRBuilder::UpdateCR(
|
||||
uint32_t n, Value* lhs, Value* rhs, bool is_signed) {
|
||||
Value* lt;
|
||||
Value* gt;
|
||||
if (is_signed) {
|
||||
lt = CompareSLT(lhs, rhs);
|
||||
gt = CompareSGT(lhs, rhs);
|
||||
Value* lt = CompareSLT(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
|
||||
Value* gt = CompareSGT(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
|
||||
} else {
|
||||
lt = CompareULT(lhs, rhs);
|
||||
gt = CompareUGT(lhs, rhs);
|
||||
Value* lt = CompareULT(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
|
||||
Value* gt = CompareUGT(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
|
||||
}
|
||||
Value* eq = CompareEQ(lhs, rhs);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt);
|
||||
StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 2, eq);
|
||||
|
||||
// Value* so = AllocValue(UINT8_TYPE);
|
||||
|
@ -279,6 +282,7 @@ Value* PPCHIRBuilder::LoadCA() {
|
|||
}
|
||||
|
||||
void PPCHIRBuilder::StoreCA(Value* value) {
|
||||
XEASSERT(value->type == INT8_TYPE);
|
||||
StoreContext(offsetof(PPCContext, xer_ca), value);
|
||||
}
|
||||
|
||||
|
@ -287,6 +291,7 @@ Value* PPCHIRBuilder::LoadSAT() {
|
|||
}
|
||||
|
||||
void PPCHIRBuilder::StoreSAT(Value* value) {
|
||||
value = Truncate(value, INT8_TYPE);
|
||||
StoreContext(offsetof(PPCContext, vscr_sat), value);
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,8 @@ bool PPCScanner::IsRestGprLr(uint64_t address) {
|
|||
}
|
||||
|
||||
int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// This is a simple basic block analyizer. It walks the start address to the
|
||||
// end address looking for branches. Each span of instructions between
|
||||
// branches is considered a basic block. When the last blr (that has no
|
||||
|
@ -286,6 +288,8 @@ int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
|
|||
}
|
||||
|
||||
std::vector<BlockInfo> PPCScanner::FindBlocks(FunctionInfo* symbol_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
Memory* memory = frontend_->memory();
|
||||
const uint8_t* p = memory->membase();
|
||||
|
||||
|
|
|
@ -38,20 +38,38 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) :
|
|||
assembler_ = backend->CreateAssembler();
|
||||
assembler_->Initialize();
|
||||
|
||||
bool validate = FLAGS_validate_hir;
|
||||
|
||||
// Build the CFG first.
|
||||
compiler_->AddPass(new passes::ControlFlowAnalysisPass());
|
||||
|
||||
// Passes are executed in the order they are added. Multiple of the same
|
||||
// pass type may be used.
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
compiler_->AddPass(new passes::ContextPromotionPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
compiler_->AddPass(new passes::SimplificationPass());
|
||||
// TODO(benvanik): run repeatedly?
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
compiler_->AddPass(new passes::ConstantPropagationPass());
|
||||
//compiler_->AddPass(new passes::TypePropagationPass());
|
||||
//compiler_->AddPass(new passes::ByteSwapEliminationPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
compiler_->AddPass(new passes::SimplificationPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
//compiler_->AddPass(new passes::DeadStoreEliminationPass());
|
||||
//if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
compiler_->AddPass(new passes::DeadCodeEliminationPass());
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
|
||||
// Removes all unneeded variables. Try not to add new ones after this.
|
||||
compiler_->AddPass(new passes::ValueReductionPass());
|
||||
//// Removes all unneeded variables. Try not to add new ones after this.
|
||||
//compiler_->AddPass(new passes::ValueReductionPass());
|
||||
//if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
|
||||
// Register allocation for the target backend.
|
||||
// Will modify the HIR to add loads/stores.
|
||||
// This should be the last pass before finalization, as after this all
|
||||
// registers are assigned and ready to be emitted.
|
||||
compiler_->AddPass(new passes::RegisterAllocationPass(
|
||||
backend->machine_info()));
|
||||
if (validate) compiler_->AddPass(new passes::ValidationPass());
|
||||
|
||||
// Must come last. The HIR is not really HIR after this.
|
||||
compiler_->AddPass(new passes::FinalizationPass());
|
||||
|
@ -68,6 +86,8 @@ int PPCTranslator::Translate(
|
|||
FunctionInfo* symbol_info,
|
||||
uint32_t debug_info_flags,
|
||||
Function** out_function) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Scan the function to find its extents. We only need to do this if we
|
||||
// haven't already been provided with them from some other source.
|
||||
if (!symbol_info->has_end_address()) {
|
||||
|
|
|
@ -27,10 +27,10 @@ public:
|
|||
ALLOY_FRONTEND_DEINIT = ALLOY_FRONTEND | (2),
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
typedef struct Init_s {
|
||||
static const uint32_t event_type = ALLOY_FRONTEND_INIT;
|
||||
} Init;
|
||||
typedef struct {
|
||||
typedef struct Deinit_s {
|
||||
static const uint32_t event_type = ALLOY_FRONTEND_DEINIT;
|
||||
} Deinit;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <alloy/hir/block.h>
|
||||
|
||||
#include <alloy/hir/instr.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::hir;
|
||||
|
||||
|
||||
void Block::AssertNoCycles() {
|
||||
Instr* hare = instr_head;
|
||||
Instr* tortoise = instr_head;
|
||||
if (!hare) {
|
||||
return;
|
||||
}
|
||||
while (hare = hare->next) {
|
||||
if (hare == tortoise) {
|
||||
// Cycle!
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
hare = hare->next;
|
||||
if (hare == tortoise) {
|
||||
// Cycle!
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
tortoise = tortoise->next;
|
||||
if (!hare || !tortoise) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -12,15 +12,37 @@
|
|||
|
||||
#include <alloy/core.h>
|
||||
|
||||
XEDECLARECLASS1(llvm, BitVector);
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace hir {
|
||||
|
||||
class Block;
|
||||
class HIRBuilder;
|
||||
class Instr;
|
||||
class Label;
|
||||
|
||||
|
||||
class Edge {
|
||||
public:
|
||||
enum EdgeFlags {
|
||||
UNCONDITIONAL = (1 << 0),
|
||||
DOMINATES = (1 << 1),
|
||||
};
|
||||
public:
|
||||
Edge* outgoing_next;
|
||||
Edge* outgoing_prev;
|
||||
Edge* incoming_next;
|
||||
Edge* incoming_prev;
|
||||
|
||||
Block* src;
|
||||
Block* dest;
|
||||
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
|
||||
class Block {
|
||||
public:
|
||||
Arena* arena;
|
||||
|
@ -28,6 +50,10 @@ public:
|
|||
Block* next;
|
||||
Block* prev;
|
||||
|
||||
Edge* incoming_edge_head;
|
||||
Edge* outgoing_edge_head;
|
||||
llvm::BitVector* incoming_values;
|
||||
|
||||
Label* label_head;
|
||||
Label* label_tail;
|
||||
|
||||
|
@ -35,6 +61,8 @@ public:
|
|||
Instr* instr_tail;
|
||||
|
||||
uint16_t ordinal;
|
||||
|
||||
void AssertNoCycles();
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ void HIRBuilder::Reset() {
|
|||
attributes_ = 0;
|
||||
next_label_id_ = 0;
|
||||
next_value_ordinal_ = 0;
|
||||
locals_.clear();
|
||||
block_head_ = block_tail_ = NULL;
|
||||
current_block_ = NULL;
|
||||
#if XE_DEBUG
|
||||
|
@ -50,6 +51,8 @@ void HIRBuilder::Reset() {
|
|||
}
|
||||
|
||||
int HIRBuilder::Finalize() {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Scan blocks in order and add fallthrough branches. These are needed for
|
||||
// analysis passes to work. We may have also added blocks out of order and
|
||||
// need to ensure they fall through in the right order.
|
||||
|
@ -71,7 +74,7 @@ int HIRBuilder::Finalize() {
|
|||
// No following block.
|
||||
// Sometimes VC++ generates functions with bl at the end even if they
|
||||
// will never return. Just add a return to satisfy things.
|
||||
XELOGW("Fall-through out of the function.");
|
||||
//XELOGW("Fall-through out of the function.");
|
||||
Trap();
|
||||
Return();
|
||||
current_block_ = NULL;
|
||||
|
@ -91,7 +94,7 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) {
|
|||
case INT8_TYPE: str->Append("%X", value->constant.i8); break;
|
||||
case INT16_TYPE: str->Append("%X", value->constant.i16); break;
|
||||
case INT32_TYPE: str->Append("%X", value->constant.i32); break;
|
||||
case INT64_TYPE: str->Append("%X", value->constant.i64); break;
|
||||
case INT64_TYPE: str->Append("%llX", value->constant.i64); break;
|
||||
case FLOAT32_TYPE: str->Append("%F", value->constant.f32); break;
|
||||
case FLOAT64_TYPE: str->Append("%F", value->constant.f64); break;
|
||||
case VEC128_TYPE: str->Append("(%F,%F,%F,%F)",
|
||||
|
@ -107,6 +110,9 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) {
|
|||
};
|
||||
str->Append("v%d.%s", value->ordinal, type_names[value->type]);
|
||||
}
|
||||
if (value->reg.index != -1) {
|
||||
str->Append("<%s%d>", value->reg.set->name, value->reg.index);
|
||||
}
|
||||
}
|
||||
|
||||
void HIRBuilder::DumpOp(
|
||||
|
@ -137,10 +143,19 @@ void HIRBuilder::DumpOp(
|
|||
}
|
||||
|
||||
void HIRBuilder::Dump(StringBuffer* str) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
if (attributes_) {
|
||||
str->Append("; attributes = %.8X\n", attributes_);
|
||||
}
|
||||
|
||||
for (auto it = locals_.begin(); it != locals_.end(); ++it) {
|
||||
auto local = *it;
|
||||
str->Append(" ; local ");
|
||||
DumpValue(str, local);
|
||||
str->Append("\n");
|
||||
}
|
||||
|
||||
uint32_t block_ordinal = 0;
|
||||
Block* block = block_head_;
|
||||
while (block) {
|
||||
|
@ -161,6 +176,39 @@ void HIRBuilder::Dump(StringBuffer* str) {
|
|||
label = label->next;
|
||||
}
|
||||
|
||||
Edge* incoming_edge = block->incoming_edge_head;
|
||||
while (incoming_edge) {
|
||||
auto src_label = incoming_edge->src->label_head;
|
||||
if (src_label && src_label->name) {
|
||||
str->Append(" ; in: %s", src_label->name);
|
||||
} else if (src_label) {
|
||||
str->Append(" ; in: label%d", src_label->id);
|
||||
} else {
|
||||
str->Append(" ; in: <block%d>",
|
||||
incoming_edge->src->ordinal);
|
||||
}
|
||||
str->Append(", dom:%d, uncond:%d\n",
|
||||
(incoming_edge->flags & Edge::DOMINATES) ? 1 : 0,
|
||||
(incoming_edge->flags & Edge::UNCONDITIONAL) ? 1 : 0);
|
||||
incoming_edge = incoming_edge->incoming_next;
|
||||
}
|
||||
Edge* outgoing_edge = block->outgoing_edge_head;
|
||||
while (outgoing_edge) {
|
||||
auto dest_label = outgoing_edge->dest->label_head;
|
||||
if (dest_label && dest_label->name) {
|
||||
str->Append(" ; out: %s", dest_label->name);
|
||||
} else if (dest_label) {
|
||||
str->Append(" ; out: label%d", dest_label->id);
|
||||
} else {
|
||||
str->Append(" ; out: <block%d>",
|
||||
outgoing_edge->dest->ordinal);
|
||||
}
|
||||
str->Append(", dom:%d, uncond:%d\n",
|
||||
(outgoing_edge->flags & Edge::DOMINATES) ? 1 : 0,
|
||||
(outgoing_edge->flags & Edge::UNCONDITIONAL) ? 1 : 0);
|
||||
outgoing_edge = outgoing_edge->outgoing_next;
|
||||
}
|
||||
|
||||
Instr* i = block->instr_head;
|
||||
while (i) {
|
||||
if (i->opcode->flags & OPCODE_FLAG_HIDE) {
|
||||
|
@ -208,6 +256,29 @@ void HIRBuilder::Dump(StringBuffer* str) {
|
|||
}
|
||||
}
|
||||
|
||||
void HIRBuilder::AssertNoCycles() {
|
||||
Block* hare = block_head_;
|
||||
Block* tortoise = block_head_;
|
||||
if (!hare) {
|
||||
return;
|
||||
}
|
||||
while (hare = hare->next) {
|
||||
if (hare == tortoise) {
|
||||
// Cycle!
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
hare = hare->next;
|
||||
if (hare == tortoise) {
|
||||
// Cycle!
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
tortoise = tortoise->next;
|
||||
if (!hare || !tortoise) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Block* HIRBuilder::current_block() const {
|
||||
return current_block_;
|
||||
}
|
||||
|
@ -303,6 +374,7 @@ void HIRBuilder::InsertLabel(Label* label, Instr* prev_instr) {
|
|||
block_tail_ = new_block;
|
||||
}
|
||||
new_block->label_head = new_block->label_tail = label;
|
||||
new_block->incoming_edge_head = new_block->outgoing_edge_head = NULL;
|
||||
label->block = new_block;
|
||||
label->prev = label->next = NULL;
|
||||
|
||||
|
@ -319,8 +391,7 @@ void HIRBuilder::InsertLabel(Label* label, Instr* prev_instr) {
|
|||
new_block->instr_tail = old_prev_tail;
|
||||
}
|
||||
|
||||
for (auto instr = new_block->instr_head; instr != new_block->instr_tail;
|
||||
instr = instr->next) {
|
||||
for (auto instr = new_block->instr_head; instr; instr = instr->next) {
|
||||
instr->block = new_block;
|
||||
}
|
||||
|
||||
|
@ -342,6 +413,19 @@ void HIRBuilder::ResetLabelTags() {
|
|||
}
|
||||
}
|
||||
|
||||
void HIRBuilder::AddEdge(Block* src, Block* dest, uint32_t flags) {
|
||||
Edge* edge = arena_->Alloc<Edge>();
|
||||
edge->src = src;
|
||||
edge->dest = dest;
|
||||
edge->flags = flags;
|
||||
edge->outgoing_prev = NULL;
|
||||
edge->outgoing_next = src->outgoing_edge_head;
|
||||
src->outgoing_edge_head = edge;
|
||||
edge->incoming_prev = NULL;
|
||||
edge->incoming_next = dest->incoming_edge_head;
|
||||
dest->incoming_edge_head = edge;
|
||||
}
|
||||
|
||||
Block* HIRBuilder::AppendBlock() {
|
||||
Block* block = arena_->Alloc<Block>();
|
||||
block->arena = arena_;
|
||||
|
@ -356,6 +440,7 @@ Block* HIRBuilder::AppendBlock() {
|
|||
}
|
||||
current_block_ = block;
|
||||
block->label_head = block->label_tail = NULL;
|
||||
block->incoming_edge_head = block->outgoing_edge_head = NULL;
|
||||
block->instr_head = block->instr_tail = NULL;
|
||||
return block;
|
||||
}
|
||||
|
@ -398,6 +483,7 @@ Instr* HIRBuilder::AppendInstr(
|
|||
if (!block->instr_head) {
|
||||
block->instr_head = instr;
|
||||
}
|
||||
instr->ordinal = -1;
|
||||
instr->block = block;
|
||||
instr->opcode = &opcode_info;
|
||||
instr->flags = flags;
|
||||
|
@ -420,8 +506,10 @@ Value* HIRBuilder::AllocValue(TypeName type) {
|
|||
value->def = NULL;
|
||||
value->use_head = NULL;
|
||||
value->last_use = NULL;
|
||||
value->local_slot = NULL;
|
||||
value->tag = NULL;
|
||||
value->reg = -1;
|
||||
value->reg.set = NULL;
|
||||
value->reg.index = -1;
|
||||
return value;
|
||||
}
|
||||
|
||||
|
@ -434,8 +522,10 @@ Value* HIRBuilder::CloneValue(Value* source) {
|
|||
value->def = NULL;
|
||||
value->use_head = NULL;
|
||||
value->last_use = NULL;
|
||||
value->local_slot = NULL;
|
||||
value->tag = NULL;
|
||||
value->reg = -1;
|
||||
value->reg.set = NULL;
|
||||
value->reg.index = -1;
|
||||
return value;
|
||||
}
|
||||
|
||||
|
@ -557,6 +647,13 @@ void HIRBuilder::CallIndirectTrue(
|
|||
EndBlock();
|
||||
}
|
||||
|
||||
void HIRBuilder::CallExtern(FunctionInfo* symbol_info) {
|
||||
Instr* i = AppendInstr(OPCODE_CALL_EXTERN_info, 0);
|
||||
i->src1.symbol_info = symbol_info;
|
||||
i->src2.value = i->src3.value = NULL;
|
||||
EndBlock();
|
||||
}
|
||||
|
||||
void HIRBuilder::Return() {
|
||||
Instr* i = AppendInstr(OPCODE_RETURN_info, 0);
|
||||
i->src1.value = i->src2.value = i->src3.value = NULL;
|
||||
|
@ -578,6 +675,12 @@ void HIRBuilder::ReturnTrue(Value* cond) {
|
|||
EndBlock();
|
||||
}
|
||||
|
||||
void HIRBuilder::SetReturnAddress(Value* value) {
|
||||
Instr* i = AppendInstr(OPCODE_SET_RETURN_ADDRESS_info, 0);
|
||||
i->set_src1(value);
|
||||
i->src2.value = i->src3.value = NULL;
|
||||
}
|
||||
|
||||
void HIRBuilder::Branch(Label* label, uint32_t branch_flags) {
|
||||
Instr* i = AppendInstr(OPCODE_BRANCH_info, branch_flags);
|
||||
i->src1.label = label;
|
||||
|
@ -870,6 +973,28 @@ Value* HIRBuilder::LoadClock() {
|
|||
return i->dest;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::AllocLocal(TypeName type) {
|
||||
Value* slot = AllocValue(type);
|
||||
locals_.push_back(slot);
|
||||
return slot;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::LoadLocal(Value* slot) {
|
||||
Instr* i = AppendInstr(
|
||||
OPCODE_LOAD_LOCAL_info, 0,
|
||||
AllocValue(slot->type));
|
||||
i->set_src1(slot);
|
||||
i->src2.value = i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
void HIRBuilder::StoreLocal(Value* slot, Value* value) {
|
||||
Instr* i = AppendInstr(OPCODE_STORE_LOCAL_info, 0);
|
||||
i->set_src1(slot);
|
||||
i->set_src2(value);
|
||||
i->src3.value = NULL;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::LoadContext(size_t offset, TypeName type) {
|
||||
Instr* i = AppendInstr(
|
||||
OPCODE_LOAD_CONTEXT_info, 0,
|
||||
|
@ -1631,16 +1756,19 @@ Value* HIRBuilder::Extract(Value* value, Value* index,
|
|||
TypeName target_type) {
|
||||
// TODO(benvanik): could do some of this as constants.
|
||||
|
||||
Value* trunc_index = index->type != INT8_TYPE ?
|
||||
Truncate(index, INT8_TYPE) : index;
|
||||
|
||||
Instr* i = AppendInstr(
|
||||
OPCODE_EXTRACT_info, 0,
|
||||
AllocValue(target_type));
|
||||
i->set_src1(value);
|
||||
i->set_src2(ZeroExtend(index, INT64_TYPE));
|
||||
i->set_src2(trunc_index);
|
||||
i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::Extract(Value* value, uint64_t index,
|
||||
Value* HIRBuilder::Extract(Value* value, uint8_t index,
|
||||
TypeName target_type) {
|
||||
return Extract(value, LoadConstant(index), target_type);
|
||||
}
|
||||
|
|
|
@ -35,13 +35,19 @@ public:
|
|||
virtual int Finalize();
|
||||
|
||||
void Dump(StringBuffer* str);
|
||||
void AssertNoCycles();
|
||||
|
||||
Arena* arena() const { return arena_; }
|
||||
|
||||
uint32_t attributes() const { return attributes_; }
|
||||
void set_attributes(uint32_t value) { attributes_ = value; }
|
||||
|
||||
std::vector<Value*>& locals() { return locals_; }
|
||||
|
||||
uint32_t max_value_ordinal() const { return next_value_ordinal_; }
|
||||
|
||||
Block* first_block() const { return block_head_; }
|
||||
Block* last_block() const { return block_tail_; }
|
||||
Block* current_block() const;
|
||||
Instr* last_instr() const;
|
||||
|
||||
|
@ -50,12 +56,11 @@ public:
|
|||
void InsertLabel(Label* label, Instr* prev_instr);
|
||||
void ResetLabelTags();
|
||||
|
||||
void AddEdge(Block* src, Block* dest, uint32_t flags);
|
||||
|
||||
// static allocations:
|
||||
// Value* AllocStatic(size_t length);
|
||||
|
||||
// stack allocations:
|
||||
// Value* AllocLocal(TypeName type);
|
||||
|
||||
void Comment(const char* format, ...);
|
||||
|
||||
void Nop();
|
||||
|
@ -74,8 +79,10 @@ public:
|
|||
uint32_t call_flags = 0);
|
||||
void CallIndirect(Value* value, uint32_t call_flags = 0);
|
||||
void CallIndirectTrue(Value* cond, Value* value, uint32_t call_flags = 0);
|
||||
void CallExtern(runtime::FunctionInfo* symbol_info);
|
||||
void Return();
|
||||
void ReturnTrue(Value* cond);
|
||||
void SetReturnAddress(Value* value);
|
||||
|
||||
void Branch(Label* label, uint32_t branch_flags = 0);
|
||||
void Branch(Block* block, uint32_t branch_flags = 0);
|
||||
|
@ -115,6 +122,10 @@ public:
|
|||
|
||||
Value* LoadClock();
|
||||
|
||||
Value* AllocLocal(TypeName type);
|
||||
Value* LoadLocal(Value* slot);
|
||||
void StoreLocal(Value* slot, Value* value);
|
||||
|
||||
Value* LoadContext(size_t offset, TypeName type);
|
||||
void StoreContext(size_t offset, Value* value);
|
||||
|
||||
|
@ -186,7 +197,7 @@ public:
|
|||
Value* Insert(Value* value, Value* index, Value* part);
|
||||
Value* Insert(Value* value, uint64_t index, Value* part);
|
||||
Value* Extract(Value* value, Value* index, TypeName target_type);
|
||||
Value* Extract(Value* value, uint64_t index, TypeName target_type);
|
||||
Value* Extract(Value* value, uint8_t index, TypeName target_type);
|
||||
// i8->i16/i32/... (i8|i8 / i8|i8|i8|i8 / ...)
|
||||
// i8/i16/i32 -> vec128
|
||||
Value* Splat(Value* value, TypeName target_type);
|
||||
|
@ -229,6 +240,8 @@ protected:
|
|||
uint32_t next_label_id_;
|
||||
uint32_t next_value_ordinal_;
|
||||
|
||||
std::vector<Value*> locals_;
|
||||
|
||||
Block* block_head_;
|
||||
Block* block_tail_;
|
||||
Block* current_block_;
|
||||
|
|
|
@ -48,17 +48,34 @@ void Instr::set_src3(Value* value) {
|
|||
src3_use = value ? value->AddUse(block->arena, this) : NULL;
|
||||
}
|
||||
|
||||
bool Instr::Match(SignatureType dest_req,
|
||||
SignatureType src1_req,
|
||||
SignatureType src2_req,
|
||||
SignatureType src3_req) const {
|
||||
#define TO_SIG_TYPE(v) \
|
||||
(v ? (v->IsConstant() ? SignatureType((v->type + 1) | SIG_TYPE_C) : SignatureType(v->type + 1)) : SIG_TYPE_X)
|
||||
return
|
||||
((dest_req == SIG_TYPE_IGNORE) || (dest_req == TO_SIG_TYPE(dest))) &&
|
||||
((src1_req == SIG_TYPE_IGNORE) || (src1_req == TO_SIG_TYPE(src1.value))) &&
|
||||
((src2_req == SIG_TYPE_IGNORE) || (src2_req == TO_SIG_TYPE(src2.value))) &&
|
||||
((src3_req == SIG_TYPE_IGNORE) || (src3_req == TO_SIG_TYPE(src3.value)));
|
||||
void Instr::MoveBefore(Instr* other) {
|
||||
if (next == other) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove from current location.
|
||||
if (prev) {
|
||||
prev->next = next;
|
||||
} else {
|
||||
block->instr_head = next;
|
||||
}
|
||||
if (next) {
|
||||
next->prev = prev;
|
||||
} else {
|
||||
block->instr_tail = prev;
|
||||
}
|
||||
|
||||
// Insert into new location.
|
||||
block = other->block;
|
||||
next = other;
|
||||
prev = other->prev;
|
||||
other->prev = this;
|
||||
if (prev) {
|
||||
prev->next = this;
|
||||
}
|
||||
if (other == block->instr_head) {
|
||||
block->instr_head = this;
|
||||
}
|
||||
}
|
||||
|
||||
void Instr::Replace(const OpcodeInfo* opcode, uint16_t flags) {
|
||||
|
|
|
@ -24,26 +24,6 @@ namespace hir {
|
|||
class Block;
|
||||
class Label;
|
||||
|
||||
enum SignatureType {
|
||||
SIG_TYPE_X = 0,
|
||||
SIG_TYPE_I8 = 1,
|
||||
SIG_TYPE_I16 = 2,
|
||||
SIG_TYPE_I32 = 3,
|
||||
SIG_TYPE_I64 = 4,
|
||||
SIG_TYPE_F32 = 5,
|
||||
SIG_TYPE_F64 = 6,
|
||||
SIG_TYPE_V128 = 7,
|
||||
SIG_TYPE_C = (1 << 3),
|
||||
SIG_TYPE_I8C = SIG_TYPE_C | SIG_TYPE_I8,
|
||||
SIG_TYPE_I16C = SIG_TYPE_C | SIG_TYPE_I16,
|
||||
SIG_TYPE_I32C = SIG_TYPE_C | SIG_TYPE_I32,
|
||||
SIG_TYPE_I64C = SIG_TYPE_C | SIG_TYPE_I64,
|
||||
SIG_TYPE_F32C = SIG_TYPE_C | SIG_TYPE_F32,
|
||||
SIG_TYPE_F64C = SIG_TYPE_C | SIG_TYPE_F64,
|
||||
SIG_TYPE_V128C = SIG_TYPE_C | SIG_TYPE_V128,
|
||||
SIG_TYPE_IGNORE = 0xFF,
|
||||
};
|
||||
|
||||
class Instr {
|
||||
public:
|
||||
Block* block;
|
||||
|
@ -52,7 +32,7 @@ public:
|
|||
|
||||
const OpcodeInfo* opcode;
|
||||
uint16_t flags;
|
||||
uint16_t ordinal;
|
||||
uint32_t ordinal;
|
||||
|
||||
typedef union {
|
||||
runtime::FunctionInfo* symbol_info;
|
||||
|
@ -74,11 +54,7 @@ public:
|
|||
void set_src2(Value* value);
|
||||
void set_src3(Value* value);
|
||||
|
||||
bool Match(SignatureType dest = SIG_TYPE_X,
|
||||
SignatureType src1 = SIG_TYPE_X,
|
||||
SignatureType src2 = SIG_TYPE_X,
|
||||
SignatureType src3 = SIG_TYPE_X) const;
|
||||
|
||||
void MoveBefore(Instr* other);
|
||||
void Replace(const OpcodeInfo* opcode, uint16_t flags);
|
||||
void Remove();
|
||||
};
|
||||
|
|
|
@ -18,7 +18,8 @@ namespace hir {
|
|||
|
||||
|
||||
enum CallFlags {
|
||||
CALL_TAIL = (1 << 1),
|
||||
CALL_TAIL = (1 << 1),
|
||||
CALL_POSSIBLE_RETURN = (1 << 2),
|
||||
};
|
||||
enum BranchFlags {
|
||||
BRANCH_LIKELY = (1 << 1),
|
||||
|
@ -94,8 +95,10 @@ enum Opcode {
|
|||
OPCODE_CALL_TRUE,
|
||||
OPCODE_CALL_INDIRECT,
|
||||
OPCODE_CALL_INDIRECT_TRUE,
|
||||
OPCODE_CALL_EXTERN,
|
||||
OPCODE_RETURN,
|
||||
OPCODE_RETURN_TRUE,
|
||||
OPCODE_SET_RETURN_ADDRESS,
|
||||
|
||||
OPCODE_BRANCH,
|
||||
OPCODE_BRANCH_TRUE,
|
||||
|
@ -116,6 +119,9 @@ enum Opcode {
|
|||
|
||||
OPCODE_LOAD_CLOCK,
|
||||
|
||||
OPCODE_LOAD_LOCAL,
|
||||
OPCODE_STORE_LOCAL,
|
||||
|
||||
OPCODE_LOAD_CONTEXT,
|
||||
OPCODE_STORE_CONTEXT,
|
||||
|
||||
|
@ -201,6 +207,7 @@ enum OpcodeFlags {
|
|||
OPCODE_FLAG_VOLATILE = (1 << 4),
|
||||
OPCODE_FLAG_IGNORE = (1 << 5),
|
||||
OPCODE_FLAG_HIDE = (1 << 6),
|
||||
OPCODE_FLAG_PAIRED_PREV = (1 << 7),
|
||||
};
|
||||
|
||||
enum OpcodeSignatureType {
|
||||
|
|
|
@ -11,566 +11,590 @@
|
|||
DEFINE_OPCODE(
|
||||
OPCODE_COMMENT,
|
||||
"comment",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_IGNORE);
|
||||
OPCODE_SIG_X_O,
|
||||
OPCODE_FLAG_IGNORE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_NOP,
|
||||
"nop",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_IGNORE);
|
||||
OPCODE_FLAG_IGNORE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SOURCE_OFFSET,
|
||||
"source_offset",
|
||||
OPCODE_SIG_X_O,
|
||||
OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE);
|
||||
OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DEBUG_BREAK,
|
||||
"debug_break",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DEBUG_BREAK_TRUE,
|
||||
"debug_break_true",
|
||||
OPCODE_SIG_X_V,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_TRAP,
|
||||
"trap",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_TRAP_TRUE,
|
||||
"trap_true",
|
||||
OPCODE_SIG_X_V,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL,
|
||||
"call",
|
||||
OPCODE_SIG_X_S,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL_TRUE,
|
||||
"call_true",
|
||||
OPCODE_SIG_X_V_S,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL_INDIRECT,
|
||||
"call_indirect",
|
||||
OPCODE_SIG_X_V,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL_INDIRECT_TRUE,
|
||||
"call_indirect_true",
|
||||
OPCODE_SIG_X_V_V,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CALL_EXTERN,
|
||||
"call_extern",
|
||||
OPCODE_SIG_X_S,
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_RETURN,
|
||||
"return",
|
||||
OPCODE_SIG_X,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_RETURN_TRUE,
|
||||
"return_true",
|
||||
OPCODE_SIG_X_V,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SET_RETURN_ADDRESS,
|
||||
"set_return_address",
|
||||
OPCODE_SIG_X_V,
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BRANCH,
|
||||
"branch",
|
||||
OPCODE_SIG_X_L,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BRANCH_TRUE,
|
||||
"branch_true",
|
||||
OPCODE_SIG_X_V_L,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BRANCH_FALSE,
|
||||
"branch_false",
|
||||
OPCODE_SIG_X_V_L,
|
||||
OPCODE_FLAG_BRANCH);
|
||||
OPCODE_FLAG_BRANCH)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ASSIGN,
|
||||
"assign",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CAST,
|
||||
"cast",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ZERO_EXTEND,
|
||||
"zero_extend",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SIGN_EXTEND,
|
||||
"sign_extend",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_TRUNCATE,
|
||||
"truncate",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CONVERT,
|
||||
"convert",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ROUND,
|
||||
"round",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_CONVERT_I2F,
|
||||
"vector_convert_i2f",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_CONVERT_F2I,
|
||||
"vector_convert_f2i",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_VECTOR_SHL,
|
||||
"load_vector_shl",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_VECTOR_SHR,
|
||||
"load_vector_shr",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_CLOCK,
|
||||
"load_clock",
|
||||
OPCODE_SIG_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_LOCAL,
|
||||
"load_local",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE_LOCAL,
|
||||
"store_local",
|
||||
OPCODE_SIG_X_V_V,
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_CONTEXT,
|
||||
"load_context",
|
||||
OPCODE_SIG_V_O,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE_CONTEXT,
|
||||
"store_context",
|
||||
OPCODE_SIG_X_O_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD,
|
||||
"load",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_MEMORY);
|
||||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE,
|
||||
"store",
|
||||
OPCODE_SIG_X_V_V,
|
||||
OPCODE_FLAG_MEMORY);
|
||||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_PREFETCH,
|
||||
"prefetch",
|
||||
OPCODE_SIG_X_V_O,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MAX,
|
||||
"max",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MIN,
|
||||
"min",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SELECT,
|
||||
"select",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_IS_TRUE,
|
||||
"is_true",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_IS_FALSE,
|
||||
"is_false",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_EQ,
|
||||
"compare_eq",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_NE,
|
||||
"compare_ne",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_SLT,
|
||||
"compare_slt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_SLE,
|
||||
"compare_sle",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_SGT,
|
||||
"compare_sgt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_SGE,
|
||||
"compare_sge",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_ULT,
|
||||
"compare_ult",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_ULE,
|
||||
"compare_ule",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_UGT,
|
||||
"compare_ugt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_UGE,
|
||||
"compare_uge",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DID_CARRY,
|
||||
"did_carry",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
OPCODE_FLAG_PAIRED_PREV)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DID_OVERFLOW,
|
||||
"did_overflow",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
OPCODE_FLAG_PAIRED_PREV)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DID_SATURATE,
|
||||
"did_saturate",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
OPCODE_FLAG_PAIRED_PREV)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_EQ,
|
||||
"vector_compare_eq",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_SGT,
|
||||
"vector_compare_sgt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_SGE,
|
||||
"vector_compare_sge",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_UGT,
|
||||
"vector_compare_ugt",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_COMPARE_UGE,
|
||||
"vector_compare_uge",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ADD,
|
||||
"add",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ADD_CARRY,
|
||||
"add_carry",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_ADD,
|
||||
"vector_add",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SUB,
|
||||
"sub",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL,
|
||||
"mul",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL_HI,
|
||||
"mul_hi",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DIV,
|
||||
"div",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL_ADD,
|
||||
"mul_add",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL_SUB,
|
||||
"mul_sub",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_NEG,
|
||||
"neg",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ABS,
|
||||
"abs",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SQRT,
|
||||
"sqrt",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_RSQRT,
|
||||
"rsqrt",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_POW2,
|
||||
"pow2",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOG2,
|
||||
"log2",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DOT_PRODUCT_3,
|
||||
"dot_product_3",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DOT_PRODUCT_4,
|
||||
"dot_product_4",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_AND,
|
||||
"and",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_OR,
|
||||
"or",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_XOR,
|
||||
"xor",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_COMMUNATIVE);
|
||||
OPCODE_FLAG_COMMUNATIVE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_NOT,
|
||||
"not",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SHL,
|
||||
"shl",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_SHL,
|
||||
"vector_shl",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SHR,
|
||||
"shr",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_SHR,
|
||||
"vector_shr",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SHA,
|
||||
"sha",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_SHA,
|
||||
"vector_sha",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ROTATE_LEFT,
|
||||
"rotate_left",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BYTE_SWAP,
|
||||
"byte_swap",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_CNTLZ,
|
||||
"cntlz",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_INSERT,
|
||||
"insert",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_EXTRACT,
|
||||
"extract",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SPLAT,
|
||||
"splat",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_PERMUTE,
|
||||
"permute",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SWIZZLE,
|
||||
"swizzle",
|
||||
OPCODE_SIG_V_V_O,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_PACK,
|
||||
"pack",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_UNPACK,
|
||||
"unpack",
|
||||
OPCODE_SIG_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_COMPARE_EXCHANGE,
|
||||
"compare_exchange",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ATOMIC_EXCHANGE,
|
||||
"atomic_exchange",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_VOLATILE);
|
||||
OPCODE_FLAG_VOLATILE)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ATOMIC_ADD,
|
||||
"atomic_add",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ATOMIC_SUB,
|
||||
"atomic_sub",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0);
|
||||
0)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# Copyright 2013 Ben Vanik. All Rights Reserved.
|
||||
{
|
||||
'sources': [
|
||||
'block.cc',
|
||||
'block.h',
|
||||
'hir_builder.cc',
|
||||
'hir_builder.h',
|
||||
|
|
|
@ -187,19 +187,26 @@ void Value::Round(RoundMode round_mode) {
|
|||
XEASSERTALWAYS();
|
||||
}
|
||||
|
||||
void Value::Add(Value* other) {
|
||||
bool Value::Add(Value* other) {
|
||||
#define CHECK_DID_CARRY(v1, v2) (((uint64_t)v2) > ~((uint64_t)v1))
|
||||
#define ADD_DID_CARRY(a, b) CHECK_DID_CARRY(a, b)
|
||||
XEASSERT(type == other->type);
|
||||
bool did_carry = false;
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
did_carry = ADD_DID_CARRY(constant.i8, other->constant.i8);
|
||||
constant.i8 += other->constant.i8;
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
did_carry = ADD_DID_CARRY(constant.i16, other->constant.i16);
|
||||
constant.i16 += other->constant.i16;
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
did_carry = ADD_DID_CARRY(constant.i32, other->constant.i32);
|
||||
constant.i32 += other->constant.i32;
|
||||
break;
|
||||
case INT64_TYPE:
|
||||
did_carry = ADD_DID_CARRY(constant.i64, other->constant.i64);
|
||||
constant.i64 += other->constant.i64;
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
|
@ -212,21 +219,28 @@ void Value::Add(Value* other) {
|
|||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
return did_carry;
|
||||
}
|
||||
|
||||
void Value::Sub(Value* other) {
|
||||
bool Value::Sub(Value* other) {
|
||||
#define SUB_DID_CARRY(a, b) (b > a)
|
||||
XEASSERT(type == other->type);
|
||||
bool did_carry = false;
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
did_carry = SUB_DID_CARRY(constant.i8, other->constant.i8);
|
||||
constant.i8 -= other->constant.i8;
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
did_carry = SUB_DID_CARRY(constant.i16, other->constant.i16);
|
||||
constant.i16 -= other->constant.i16;
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
did_carry = SUB_DID_CARRY(constant.i32, other->constant.i32);
|
||||
constant.i32 -= other->constant.i32;
|
||||
break;
|
||||
case INT64_TYPE:
|
||||
did_carry = SUB_DID_CARRY(constant.i64, other->constant.i64);
|
||||
constant.i64 -= other->constant.i64;
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
|
@ -239,6 +253,7 @@ void Value::Sub(Value* other) {
|
|||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
return did_carry;
|
||||
}
|
||||
|
||||
void Value::Mul(Value* other) {
|
||||
|
@ -560,6 +575,26 @@ void Value::ByteSwap() {
|
|||
}
|
||||
}
|
||||
|
||||
void Value::CountLeadingZeros(const Value* other) {
|
||||
switch (other->type) {
|
||||
case INT8_TYPE:
|
||||
constant.i8 = static_cast<uint8_t>(__lzcnt16(other->constant.i8) - 8);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
constant.i8 = static_cast<uint8_t>(__lzcnt16(other->constant.i16));
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
constant.i8 = static_cast<uint8_t>(__lzcnt(other->constant.i32));
|
||||
break;
|
||||
case INT64_TYPE:
|
||||
constant.i8 = static_cast<uint8_t>(__lzcnt64(other->constant.i64));
|
||||
break;
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool Value::Compare(Opcode opcode, Value* other) {
|
||||
// TODO(benvanik): big matrix.
|
||||
XEASSERTALWAYS();
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define ALLOY_HIR_VALUE_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
#include <alloy/backend/machine_info.h>
|
||||
#include <alloy/hir/opcodes.h>
|
||||
|
||||
|
||||
|
@ -34,7 +35,32 @@ enum TypeName {
|
|||
};
|
||||
|
||||
static bool IsIntType(TypeName type_name) {
|
||||
return type_name < 4;
|
||||
return type_name <= INT64_TYPE;
|
||||
}
|
||||
static bool IsFloatType(TypeName type_name) {
|
||||
return type_name == FLOAT32_TYPE || type_name == FLOAT64_TYPE;
|
||||
}
|
||||
static bool IsVecType(TypeName type_name) {
|
||||
return type_name == VEC128_TYPE;
|
||||
}
|
||||
static size_t GetTypeSize(TypeName type_name) {
|
||||
switch (type_name) {
|
||||
case INT8_TYPE:
|
||||
return 1;
|
||||
case INT16_TYPE:
|
||||
return 2;
|
||||
case INT32_TYPE:
|
||||
return 4;
|
||||
case INT64_TYPE:
|
||||
return 8;
|
||||
case FLOAT32_TYPE:
|
||||
return 4;
|
||||
case FLOAT64_TYPE:
|
||||
return 8;
|
||||
default:
|
||||
case VEC128_TYPE:
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
||||
enum ValueFlags {
|
||||
|
@ -42,6 +68,10 @@ enum ValueFlags {
|
|||
VALUE_IS_ALLOCATED = (1 << 2), // Used by backends. Do not set.
|
||||
};
|
||||
|
||||
struct RegAssignment {
|
||||
const backend::MachineInfo::RegisterSet* set;
|
||||
int32_t index;
|
||||
};
|
||||
|
||||
class Value {
|
||||
public:
|
||||
|
@ -65,13 +95,14 @@ public:
|
|||
TypeName type;
|
||||
|
||||
uint32_t flags;
|
||||
uint32_t reg;
|
||||
RegAssignment reg;
|
||||
ConstantValue constant;
|
||||
|
||||
Instr* def;
|
||||
Use* use_head;
|
||||
// NOTE: for performance reasons this is not maintained during construction.
|
||||
Instr* last_use;
|
||||
Value* local_slot;
|
||||
|
||||
// TODO(benvanik): remove to shrink size.
|
||||
void* tag;
|
||||
|
@ -158,25 +189,26 @@ public:
|
|||
}
|
||||
bool IsConstantTrue() const {
|
||||
if (type == VEC128_TYPE) {
|
||||
return false;
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
return (flags & VALUE_IS_CONSTANT) && !!constant.i64;
|
||||
}
|
||||
bool IsConstantFalse() const {
|
||||
if (type == VEC128_TYPE) {
|
||||
return false;
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
return (flags & VALUE_IS_CONSTANT) && !constant.i64;
|
||||
}
|
||||
bool IsConstantZero() const {
|
||||
if (type == VEC128_TYPE) {
|
||||
return false;
|
||||
return (flags & VALUE_IS_CONSTANT) &&
|
||||
!constant.v128.low && !constant.v128.high;
|
||||
}
|
||||
return (flags & VALUE_IS_CONSTANT) && !constant.i64;
|
||||
}
|
||||
bool IsConstantEQ(Value* other) const {
|
||||
if (type == VEC128_TYPE) {
|
||||
return false;
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
return (flags & VALUE_IS_CONSTANT) &&
|
||||
(other->flags & VALUE_IS_CONSTANT) &&
|
||||
|
@ -184,12 +216,156 @@ public:
|
|||
}
|
||||
bool IsConstantNE(Value* other) const {
|
||||
if (type == VEC128_TYPE) {
|
||||
return false;
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
return (flags & VALUE_IS_CONSTANT) &&
|
||||
(other->flags & VALUE_IS_CONSTANT) &&
|
||||
constant.i64 != other->constant.i64;
|
||||
}
|
||||
bool IsConstantSLT(Value* other) const {
|
||||
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
return constant.i8 < other->constant.i8;
|
||||
case INT16_TYPE:
|
||||
return constant.i16 < other->constant.i16;
|
||||
case INT32_TYPE:
|
||||
return constant.i32 < other->constant.i32;
|
||||
case INT64_TYPE:
|
||||
return constant.i64 < other->constant.i64;
|
||||
case FLOAT32_TYPE:
|
||||
return constant.f32 < other->constant.f32;
|
||||
case FLOAT64_TYPE:
|
||||
return constant.f64 < other->constant.f64;
|
||||
default: XEASSERTALWAYS(); return false;
|
||||
}
|
||||
}
|
||||
bool IsConstantSLE(Value* other) const {
|
||||
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
return constant.i8 <= other->constant.i8;
|
||||
case INT16_TYPE:
|
||||
return constant.i16 <= other->constant.i16;
|
||||
case INT32_TYPE:
|
||||
return constant.i32 <= other->constant.i32;
|
||||
case INT64_TYPE:
|
||||
return constant.i64 <= other->constant.i64;
|
||||
case FLOAT32_TYPE:
|
||||
return constant.f32 <= other->constant.f32;
|
||||
case FLOAT64_TYPE:
|
||||
return constant.f64 <= other->constant.f64;
|
||||
default: XEASSERTALWAYS(); return false;
|
||||
}
|
||||
}
|
||||
bool IsConstantSGT(Value* other) const {
|
||||
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
return constant.i8 > other->constant.i8;
|
||||
case INT16_TYPE:
|
||||
return constant.i16 > other->constant.i16;
|
||||
case INT32_TYPE:
|
||||
return constant.i32 > other->constant.i32;
|
||||
case INT64_TYPE:
|
||||
return constant.i64 > other->constant.i64;
|
||||
case FLOAT32_TYPE:
|
||||
return constant.f32 > other->constant.f32;
|
||||
case FLOAT64_TYPE:
|
||||
return constant.f64 > other->constant.f64;
|
||||
default: XEASSERTALWAYS(); return false;
|
||||
}
|
||||
}
|
||||
bool IsConstantSGE(Value* other) const {
|
||||
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
return constant.i8 >= other->constant.i8;
|
||||
case INT16_TYPE:
|
||||
return constant.i16 >= other->constant.i16;
|
||||
case INT32_TYPE:
|
||||
return constant.i32 >= other->constant.i32;
|
||||
case INT64_TYPE:
|
||||
return constant.i64 >= other->constant.i64;
|
||||
case FLOAT32_TYPE:
|
||||
return constant.f32 >= other->constant.f32;
|
||||
case FLOAT64_TYPE:
|
||||
return constant.f64 >= other->constant.f64;
|
||||
default: XEASSERTALWAYS(); return false;
|
||||
}
|
||||
}
|
||||
bool IsConstantULT(Value* other) const {
|
||||
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
return (uint8_t)constant.i8 < (uint8_t)other->constant.i8;
|
||||
case INT16_TYPE:
|
||||
return (uint16_t)constant.i16 < (uint16_t)other->constant.i16;
|
||||
case INT32_TYPE:
|
||||
return (uint32_t)constant.i32 < (uint32_t)other->constant.i32;
|
||||
case INT64_TYPE:
|
||||
return (uint64_t)constant.i64 < (uint64_t)other->constant.i64;
|
||||
case FLOAT32_TYPE:
|
||||
return constant.f32 < other->constant.f32;
|
||||
case FLOAT64_TYPE:
|
||||
return constant.f64 < other->constant.f64;
|
||||
default: XEASSERTALWAYS(); return false;
|
||||
}
|
||||
}
|
||||
bool IsConstantULE(Value* other) const {
|
||||
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
return (uint8_t)constant.i8 <= (uint8_t)other->constant.i8;
|
||||
case INT16_TYPE:
|
||||
return (uint16_t)constant.i16 <= (uint16_t)other->constant.i16;
|
||||
case INT32_TYPE:
|
||||
return (uint32_t)constant.i32 <= (uint32_t)other->constant.i32;
|
||||
case INT64_TYPE:
|
||||
return (uint64_t)constant.i64 <= (uint64_t)other->constant.i64;
|
||||
case FLOAT32_TYPE:
|
||||
return constant.f32 <= other->constant.f32;
|
||||
case FLOAT64_TYPE:
|
||||
return constant.f64 <= other->constant.f64;
|
||||
default: XEASSERTALWAYS(); return false;
|
||||
}
|
||||
}
|
||||
bool IsConstantUGT(Value* other) const {
|
||||
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
return (uint8_t)constant.i8 > (uint8_t)other->constant.i8;
|
||||
case INT16_TYPE:
|
||||
return (uint16_t)constant.i16 > (uint16_t)other->constant.i16;
|
||||
case INT32_TYPE:
|
||||
return (uint32_t)constant.i32 > (uint32_t)other->constant.i32;
|
||||
case INT64_TYPE:
|
||||
return (uint64_t)constant.i64 > (uint64_t)other->constant.i64;
|
||||
case FLOAT32_TYPE:
|
||||
return constant.f32 > other->constant.f32;
|
||||
case FLOAT64_TYPE:
|
||||
return constant.f64 > other->constant.f64;
|
||||
default: XEASSERTALWAYS(); return false;
|
||||
}
|
||||
}
|
||||
bool IsConstantUGE(Value* other) const {
|
||||
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
return (uint8_t)constant.i8 >= (uint8_t)other->constant.i8;
|
||||
case INT16_TYPE:
|
||||
return (uint16_t)constant.i16 >= (uint16_t)other->constant.i16;
|
||||
case INT32_TYPE:
|
||||
return (uint32_t)constant.i32 >= (uint32_t)other->constant.i32;
|
||||
case INT64_TYPE:
|
||||
return (uint64_t)constant.i64 >= (uint64_t)other->constant.i64;
|
||||
case FLOAT32_TYPE:
|
||||
return constant.f32 >= other->constant.f32;
|
||||
case FLOAT64_TYPE:
|
||||
return constant.f64 >= other->constant.f64;
|
||||
default: XEASSERTALWAYS(); return false;
|
||||
}
|
||||
}
|
||||
uint32_t AsUint32();
|
||||
uint64_t AsUint64();
|
||||
|
||||
|
@ -199,8 +375,8 @@ public:
|
|||
void Truncate(TypeName target_type);
|
||||
void Convert(TypeName target_type, RoundMode round_mode);
|
||||
void Round(RoundMode round_mode);
|
||||
void Add(Value* other);
|
||||
void Sub(Value* other);
|
||||
bool Add(Value* other);
|
||||
bool Sub(Value* other);
|
||||
void Mul(Value* other);
|
||||
void Div(Value* other);
|
||||
static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3);
|
||||
|
@ -217,6 +393,7 @@ public:
|
|||
void Shr(Value* other);
|
||||
void Sha(Value* other);
|
||||
void ByteSwap();
|
||||
void CountLeadingZeros(const Value* other);
|
||||
bool Compare(Opcode opcode, Value* other);
|
||||
};
|
||||
|
||||
|
|
|
@ -9,14 +9,22 @@
|
|||
|
||||
#include <alloy/memory.h>
|
||||
|
||||
#if !XE_LIKE_WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
using namespace alloy;
|
||||
|
||||
|
||||
Memory::Memory() :
|
||||
membase_(0) {
|
||||
membase_(0), reserve_address_(0) {
|
||||
#if XE_LIKE_WIN32
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
system_page_size_ = si.dwPageSize;
|
||||
#else
|
||||
system_page_size_ = getpagesize();
|
||||
#endif
|
||||
}
|
||||
|
||||
Memory::~Memory() {
|
||||
|
|
|
@ -34,6 +34,8 @@ public:
|
|||
};
|
||||
inline uint32_t* reserve_address() { return &reserve_address_; }
|
||||
|
||||
virtual uint64_t page_table() const = 0;
|
||||
|
||||
virtual int Initialize();
|
||||
|
||||
void Zero(uint64_t address, size_t size);
|
||||
|
@ -43,6 +45,15 @@ public:
|
|||
uint64_t SearchAligned(uint64_t start, uint64_t end,
|
||||
const uint32_t* values, size_t value_count);
|
||||
|
||||
virtual uint8_t LoadI8(uint64_t address) = 0;
|
||||
virtual uint16_t LoadI16(uint64_t address) = 0;
|
||||
virtual uint32_t LoadI32(uint64_t address) = 0;
|
||||
virtual uint64_t LoadI64(uint64_t address) = 0;
|
||||
virtual void StoreI8(uint64_t address, uint8_t value) = 0;
|
||||
virtual void StoreI16(uint64_t address, uint16_t value) = 0;
|
||||
virtual void StoreI32(uint64_t address, uint32_t value) = 0;
|
||||
virtual void StoreI64(uint64_t address, uint64_t value) = 0;
|
||||
|
||||
virtual uint64_t HeapAlloc(
|
||||
uint64_t base_address, size_t size, uint32_t flags,
|
||||
uint32_t alignment = 0x20) = 0;
|
||||
|
|
|
@ -62,7 +62,7 @@ SourceMapEntry* DebugInfo::LookupHIROffset(uint64_t offset) {
|
|||
|
||||
SourceMapEntry* DebugInfo::LookupCodeOffset(uint64_t offset) {
|
||||
// TODO(benvanik): binary search? We know the list is sorted by code order.
|
||||
for (int n = source_map_count_ - 1; n >= 0; n--) {
|
||||
for (int64_t n = source_map_count_ - 1; n >= 0; n--) {
|
||||
auto entry = &source_map_[n];
|
||||
if (entry->code_offset <= offset) {
|
||||
return entry;
|
||||
|
|
|
@ -75,6 +75,8 @@ Entry::Status EntryTable::GetOrCreate(uint64_t address, Entry** out_entry) {
|
|||
}
|
||||
|
||||
std::vector<Function*> EntryTable::FindWithAddress(uint64_t address) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
std::vector<Function*> fns;
|
||||
LockMutex(lock_);
|
||||
for (auto it = map_.begin(); it != map_.end(); ++it) {
|
||||
|
|
|
@ -47,7 +47,7 @@ public:
|
|||
private:
|
||||
// TODO(benvanik): replace with a better data structure.
|
||||
Mutex* lock_;
|
||||
typedef std::tr1::unordered_map<uint64_t, Entry*> EntryMap;
|
||||
typedef std::unordered_map<uint64_t, Entry*> EntryMap;
|
||||
EntryMap map_;
|
||||
};
|
||||
|
||||
|
|
|
@ -17,8 +17,9 @@ using namespace alloy;
|
|||
using namespace alloy::runtime;
|
||||
|
||||
|
||||
Function::Function(Type type, uint64_t address) :
|
||||
type_(type), address_(address), debug_info_(0) {
|
||||
Function::Function(FunctionInfo* symbol_info) :
|
||||
address_(symbol_info->address()),
|
||||
symbol_info_(symbol_info), debug_info_(0) {
|
||||
// TODO(benvanik): create on demand?
|
||||
lock_ = AllocMutex();
|
||||
}
|
||||
|
@ -72,48 +73,34 @@ Breakpoint* Function::FindBreakpoint(uint64_t address) {
|
|||
return result;
|
||||
}
|
||||
|
||||
int Function::Call(ThreadState* thread_state) {
|
||||
int Function::Call(ThreadState* thread_state, uint64_t return_address) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
ThreadState* original_thread_state = ThreadState::Get();
|
||||
if (original_thread_state != thread_state) {
|
||||
ThreadState::Bind(thread_state);
|
||||
}
|
||||
int result = CallImpl(thread_state);
|
||||
|
||||
int result = 0;
|
||||
|
||||
if (symbol_info_->behavior() == FunctionInfo::BEHAVIOR_EXTERN) {
|
||||
auto handler = symbol_info_->extern_handler();
|
||||
if (handler) {
|
||||
handler(thread_state->raw_context(),
|
||||
symbol_info_->extern_arg0(),
|
||||
symbol_info_->extern_arg1());
|
||||
} else {
|
||||
XELOGW("undefined extern call to %.8X %s",
|
||||
symbol_info_->address(),
|
||||
symbol_info_->name());
|
||||
result = 1;
|
||||
}
|
||||
} else {
|
||||
CallImpl(thread_state, return_address);
|
||||
}
|
||||
|
||||
if (original_thread_state != thread_state) {
|
||||
ThreadState::Bind(original_thread_state);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ExternFunction::ExternFunction(
|
||||
uint64_t address, Handler handler, void* arg0, void* arg1) :
|
||||
name_(0),
|
||||
handler_(handler), arg0_(arg0), arg1_(arg1),
|
||||
Function(Function::EXTERN_FUNCTION, address) {
|
||||
}
|
||||
|
||||
ExternFunction::~ExternFunction() {
|
||||
if (name_) {
|
||||
xe_free(name_);
|
||||
}
|
||||
}
|
||||
|
||||
void ExternFunction::set_name(const char* name) {
|
||||
name_ = xestrdupa(name);
|
||||
}
|
||||
|
||||
int ExternFunction::CallImpl(ThreadState* thread_state) {
|
||||
if (!handler_) {
|
||||
XELOGW("undefined extern call to %.8X %s", address(), name());
|
||||
return 0;
|
||||
}
|
||||
handler_(thread_state->raw_context(), arg0_, arg1_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
GuestFunction::GuestFunction(FunctionInfo* symbol_info) :
|
||||
symbol_info_(symbol_info),
|
||||
Function(Function::USER_FUNCTION, symbol_info->address()) {
|
||||
}
|
||||
|
||||
GuestFunction::~GuestFunction() {
|
||||
}
|
||||
|
|
|
@ -24,17 +24,11 @@ class ThreadState;
|
|||
|
||||
class Function {
|
||||
public:
|
||||
enum Type {
|
||||
UNKNOWN_FUNCTION = 0,
|
||||
EXTERN_FUNCTION,
|
||||
USER_FUNCTION,
|
||||
};
|
||||
public:
|
||||
Function(Type type, uint64_t address);
|
||||
Function(FunctionInfo* symbol_info);
|
||||
virtual ~Function();
|
||||
|
||||
Type type() const { return type_; }
|
||||
uint64_t address() const { return address_; }
|
||||
FunctionInfo* symbol_info() const { return symbol_info_; }
|
||||
|
||||
DebugInfo* debug_info() const { return debug_info_; }
|
||||
void set_debug_info(DebugInfo* debug_info) { debug_info_ = debug_info; }
|
||||
|
@ -42,17 +36,18 @@ public:
|
|||
int AddBreakpoint(Breakpoint* breakpoint);
|
||||
int RemoveBreakpoint(Breakpoint* breakpoint);
|
||||
|
||||
int Call(ThreadState* thread_state);
|
||||
int Call(ThreadState* thread_state, uint64_t return_address);
|
||||
|
||||
protected:
|
||||
Breakpoint* FindBreakpoint(uint64_t address);
|
||||
virtual int AddBreakpointImpl(Breakpoint* breakpoint) { return 0; }
|
||||
virtual int RemoveBreakpointImpl(Breakpoint* breakpoint) { return 0; }
|
||||
virtual int CallImpl(ThreadState* thread_state) = 0;
|
||||
virtual int CallImpl(ThreadState* thread_state,
|
||||
uint64_t return_address) = 0;
|
||||
|
||||
protected:
|
||||
Type type_;
|
||||
uint64_t address_;
|
||||
FunctionInfo* symbol_info_;
|
||||
DebugInfo* debug_info_;
|
||||
|
||||
// TODO(benvanik): move elsewhere? DebugData?
|
||||
|
@ -61,43 +56,6 @@ protected:
|
|||
};
|
||||
|
||||
|
||||
class ExternFunction : public Function {
|
||||
public:
|
||||
typedef void(*Handler)(void* context, void* arg0, void* arg1);
|
||||
public:
|
||||
ExternFunction(uint64_t address, Handler handler, void* arg0, void* arg1);
|
||||
virtual ~ExternFunction();
|
||||
|
||||
const char* name() const { return name_; }
|
||||
void set_name(const char* name);
|
||||
|
||||
Handler handler() const { return handler_; }
|
||||
void* arg0() const { return arg0_; }
|
||||
void* arg1() const { return arg1_; }
|
||||
|
||||
protected:
|
||||
virtual int CallImpl(ThreadState* thread_state);
|
||||
|
||||
protected:
|
||||
char* name_;
|
||||
Handler handler_;
|
||||
void* arg0_;
|
||||
void* arg1_;
|
||||
};
|
||||
|
||||
|
||||
class GuestFunction : public Function {
|
||||
public:
|
||||
GuestFunction(FunctionInfo* symbol_info);
|
||||
virtual ~GuestFunction();
|
||||
|
||||
FunctionInfo* symbol_info() const { return symbol_info_; }
|
||||
|
||||
protected:
|
||||
FunctionInfo* symbol_info_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace runtime
|
||||
} // namespace alloy
|
||||
|
||||
|
|
|
@ -161,6 +161,8 @@ SymbolInfo::Status Module::DefineVariable(VariableInfo* symbol_info) {
|
|||
}
|
||||
|
||||
void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
LockMutex(lock_);
|
||||
for (auto it = list_.begin(); it != list_.end(); ++it) {
|
||||
SymbolInfo* symbol_info = *it;
|
||||
|
@ -174,6 +176,8 @@ void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
|
|||
|
||||
void Module::ForEachFunction(size_t since, size_t& version,
|
||||
std::function<void (FunctionInfo*)> callback) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
LockMutex(lock_);
|
||||
size_t count = list_.size();
|
||||
version = count;
|
||||
|
|
|
@ -62,7 +62,7 @@ protected:
|
|||
private:
|
||||
// TODO(benvanik): replace with a better data structure.
|
||||
Mutex* lock_;
|
||||
typedef std::tr1::unordered_map<uint64_t, SymbolInfo*> SymbolMap;
|
||||
typedef std::unordered_map<uint64_t, SymbolInfo*> SymbolMap;
|
||||
SymbolMap map_;
|
||||
typedef std::vector<SymbolInfo*> SymbolList;
|
||||
SymbolList list_;
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ALLOY_RUNTIME_REGISTER_ACCESS_H_
|
||||
#define ALLOY_RUNTIME_REGISTER_ACCESS_H_
|
||||
|
||||
#include <alloy/core.h>
|
||||
|
||||
|
||||
namespace alloy {
|
||||
namespace runtime {
|
||||
|
||||
typedef bool (*RegisterHandlesCallback)(void* context, uint64_t addr);
|
||||
typedef uint64_t (*RegisterReadCallback)(void* context, uint64_t addr);
|
||||
typedef void (*RegisterWriteCallback)(void* context, uint64_t addr,
|
||||
uint64_t value);
|
||||
|
||||
typedef struct RegisterAccessCallbacks_s {
|
||||
void* context;
|
||||
RegisterHandlesCallback handles;
|
||||
RegisterReadCallback read;
|
||||
RegisterWriteCallback write;
|
||||
|
||||
RegisterAccessCallbacks_s* next;
|
||||
} RegisterAccessCallbacks;
|
||||
|
||||
|
||||
} // namespace runtime
|
||||
} // namespace alloy
|
||||
|
||||
|
||||
#endif // ALLOY_RUNTIME_REGISTER_ACCESS_H_
|
|
@ -25,8 +25,7 @@ DEFINE_string(runtime_backend, "any",
|
|||
|
||||
|
||||
Runtime::Runtime(Memory* memory) :
|
||||
memory_(memory), debugger_(0), backend_(0), frontend_(0),
|
||||
access_callbacks_(0) {
|
||||
memory_(memory), debugger_(0), backend_(0), frontend_(0) {
|
||||
tracing::Initialize();
|
||||
modules_lock_ = AllocMutex(10000);
|
||||
}
|
||||
|
@ -41,14 +40,6 @@ Runtime::~Runtime() {
|
|||
UnlockMutex(modules_lock_);
|
||||
FreeMutex(modules_lock_);
|
||||
|
||||
RegisterAccessCallbacks* cbs = access_callbacks_;
|
||||
while (cbs) {
|
||||
RegisterAccessCallbacks* next = cbs->next;
|
||||
delete cbs;
|
||||
cbs = next;
|
||||
}
|
||||
access_callbacks_ = NULL;
|
||||
|
||||
delete frontend_;
|
||||
delete backend_;
|
||||
delete debugger_;
|
||||
|
@ -64,11 +55,6 @@ int Runtime::Initialize(Frontend* frontend, Backend* backend) {
|
|||
// Must be initialized by subclass before calling into this.
|
||||
XEASSERTNOTNULL(memory_);
|
||||
|
||||
int result = memory_->Initialize();
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Create debugger first. Other types hook up to it.
|
||||
debugger_ = new Debugger(this);
|
||||
|
||||
|
@ -91,10 +77,10 @@ int Runtime::Initialize(Frontend* frontend, Backend* backend) {
|
|||
#endif // ALLOY_HAS_IVM_BACKEND
|
||||
if (FLAGS_runtime_backend == "any") {
|
||||
#if defined(ALLOY_HAS_X64_BACKEND) && ALLOY_HAS_X64_BACKEND
|
||||
/*if (!backend) {
|
||||
if (!backend) {
|
||||
backend = new alloy::backend::x64::X64Backend(
|
||||
this);
|
||||
}*/
|
||||
}
|
||||
#endif // ALLOY_HAS_X64_BACKEND
|
||||
#if defined(ALLOY_HAS_IVM_BACKEND) && ALLOY_HAS_IVM_BACKEND
|
||||
if (!backend) {
|
||||
|
@ -111,7 +97,7 @@ int Runtime::Initialize(Frontend* frontend, Backend* backend) {
|
|||
backend_ = backend;
|
||||
frontend_ = frontend;
|
||||
|
||||
result = backend_->Initialize();
|
||||
int result = backend_->Initialize();
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
|
@ -159,6 +145,8 @@ std::vector<Function*> Runtime::FindFunctionsWithAddress(uint64_t address) {
|
|||
}
|
||||
|
||||
int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
*out_function = NULL;
|
||||
Entry* entry;
|
||||
Entry::Status status = entry_table_.GetOrCreate(address, &entry);
|
||||
|
@ -192,6 +180,8 @@ int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
|
|||
|
||||
int Runtime::LookupFunctionInfo(
|
||||
uint64_t address, FunctionInfo** out_symbol_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
*out_symbol_info = NULL;
|
||||
|
||||
// TODO(benvanik): fast reject invalid addresses/log errors.
|
||||
|
@ -220,6 +210,8 @@ int Runtime::LookupFunctionInfo(
|
|||
|
||||
int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
|
||||
FunctionInfo** out_symbol_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Atomic create/lookup symbol in module.
|
||||
// If we get back the NEW flag we must declare it now.
|
||||
FunctionInfo* symbol_info = NULL;
|
||||
|
@ -241,6 +233,8 @@ int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
|
|||
|
||||
int Runtime::DemandFunction(
|
||||
FunctionInfo* symbol_info, Function** out_function) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
*out_function = NULL;
|
||||
|
||||
// Lock function for generation. If it's already being generated
|
||||
|
@ -273,11 +267,3 @@ int Runtime::DemandFunction(
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Runtime::AddRegisterAccessCallbacks(
|
||||
const RegisterAccessCallbacks& callbacks) {
|
||||
RegisterAccessCallbacks* cbs = new RegisterAccessCallbacks();
|
||||
xe_copy_struct(cbs, &callbacks, sizeof(callbacks));
|
||||
cbs->next = access_callbacks_;
|
||||
access_callbacks_ = cbs;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#include <alloy/runtime/debugger.h>
|
||||
#include <alloy/runtime/entry_table.h>
|
||||
#include <alloy/runtime/module.h>
|
||||
#include <alloy/runtime/register_access.h>
|
||||
#include <alloy/runtime/symbol_info.h>
|
||||
#include <alloy/runtime/thread_state.h>
|
||||
|
||||
|
@ -38,9 +37,6 @@ public:
|
|||
Debugger* debugger() const { return debugger_; }
|
||||
frontend::Frontend* frontend() const { return frontend_; }
|
||||
backend::Backend* backend() const { return backend_; }
|
||||
RegisterAccessCallbacks* access_callbacks() const {
|
||||
return access_callbacks_;
|
||||
}
|
||||
|
||||
int Initialize(frontend::Frontend* frontend, backend::Backend* backend = 0);
|
||||
|
||||
|
@ -55,9 +51,6 @@ public:
|
|||
FunctionInfo** out_symbol_info);
|
||||
int ResolveFunction(uint64_t address, Function** out_function);
|
||||
|
||||
void AddRegisterAccessCallbacks(
|
||||
const RegisterAccessCallbacks& callbacks);
|
||||
|
||||
//uint32_t CreateCallback(void (*callback)(void* data), void* data);
|
||||
|
||||
private:
|
||||
|
@ -74,8 +67,6 @@ protected:
|
|||
EntryTable entry_table_;
|
||||
Mutex* modules_lock_;
|
||||
ModuleList modules_;
|
||||
|
||||
RegisterAccessCallbacks* access_callbacks_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
'module.h',
|
||||
'raw_module.cc',
|
||||
'raw_module.h',
|
||||
'register_access.h',
|
||||
'runtime.cc',
|
||||
'runtime.h',
|
||||
'symbol_info.cc',
|
||||
|
|
|
@ -34,11 +34,19 @@ void SymbolInfo::set_name(const char* name) {
|
|||
FunctionInfo::FunctionInfo(Module* module, uint64_t address) :
|
||||
end_address_(0), behavior_(BEHAVIOR_DEFAULT), function_(0),
|
||||
SymbolInfo(SymbolInfo::TYPE_FUNCTION, module, address) {
|
||||
xe_zero_struct(&extern_info_, sizeof(extern_info_));
|
||||
}
|
||||
|
||||
FunctionInfo::~FunctionInfo() {
|
||||
}
|
||||
|
||||
void FunctionInfo::SetupExtern(ExternHandler handler, void* arg0, void* arg1) {
|
||||
behavior_ = BEHAVIOR_EXTERN;
|
||||
extern_info_.handler = handler;
|
||||
extern_info_.arg0 = arg0;
|
||||
extern_info_.arg1 = arg1;
|
||||
}
|
||||
|
||||
VariableInfo::VariableInfo(Module* module, uint64_t address) :
|
||||
SymbolInfo(SymbolInfo::TYPE_VARIABLE, module, address) {
|
||||
}
|
||||
|
|
|
@ -63,6 +63,7 @@ public:
|
|||
BEHAVIOR_PROLOG,
|
||||
BEHAVIOR_EPILOG,
|
||||
BEHAVIOR_EPILOG_RETURN,
|
||||
BEHAVIOR_EXTERN,
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -79,10 +80,21 @@ public:
|
|||
Function* function() const { return function_; }
|
||||
void set_function(Function* value) { function_ = value; }
|
||||
|
||||
typedef void(*ExternHandler)(void* context, void* arg0, void* arg1);
|
||||
void SetupExtern(ExternHandler handler, void* arg0, void* arg1);
|
||||
ExternHandler extern_handler() const { return extern_info_.handler; }
|
||||
void* extern_arg0() const { return extern_info_.arg0; }
|
||||
void* extern_arg1() const { return extern_info_.arg1; }
|
||||
|
||||
private:
|
||||
uint64_t end_address_;
|
||||
Behavior behavior_;
|
||||
Function* function_;
|
||||
struct {
|
||||
ExternHandler handler;
|
||||
void* arg0;
|
||||
void* arg1;
|
||||
} extern_info_;
|
||||
};
|
||||
|
||||
class VariableInfo : public SymbolInfo {
|
||||
|
|
|
@ -64,6 +64,5 @@ ThreadState* ThreadState::Get() {
|
|||
}
|
||||
|
||||
uint32_t ThreadState::GetThreadID() {
|
||||
XEASSERT(thread_state_);
|
||||
return thread_state_->thread_id_;
|
||||
}
|
||||
|
|
|
@ -40,46 +40,46 @@ public:
|
|||
ALLOY_RUNTIME_MEMORY_HEAP_FREE = ALLOY_RUNTIME_MEMORY | (4),
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
typedef struct Init_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_INIT;
|
||||
} Init;
|
||||
typedef struct {
|
||||
typedef struct Deinit_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_DEINIT;
|
||||
} Deinit;
|
||||
|
||||
typedef struct {
|
||||
typedef struct ThreadInit_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_THREAD_INIT;
|
||||
} ThreadInit;
|
||||
typedef struct {
|
||||
typedef struct ThreadDeinit_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_THREAD_DEINIT;
|
||||
} ThreadDeinit;
|
||||
|
||||
typedef struct {
|
||||
typedef struct MemoryInit_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_INIT;
|
||||
// map of memory, etc?
|
||||
} MemoryInit;
|
||||
typedef struct {
|
||||
typedef struct MemoryDeinit_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_DEINIT;
|
||||
} MemoryDeinit;
|
||||
typedef struct {
|
||||
typedef struct MemoryHeapInit_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_INIT;
|
||||
uint32_t heap_id;
|
||||
uint64_t low_address;
|
||||
uint64_t high_address;
|
||||
uint32_t is_physical;
|
||||
} MemoryHeapInit;
|
||||
typedef struct {
|
||||
typedef struct MemoryHeapDeinit_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_DEINIT;
|
||||
uint32_t heap_id;
|
||||
} MemoryHeapDeinit;
|
||||
typedef struct {
|
||||
typedef struct MemoryHeapAlloc_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_ALLOC;
|
||||
uint32_t heap_id;
|
||||
uint32_t flags;
|
||||
uint64_t address;
|
||||
size_t size;
|
||||
} MemoryHeapAlloc;
|
||||
typedef struct {
|
||||
typedef struct MemoryHeapFree_s {
|
||||
static const uint32_t event_type = ALLOY_RUNTIME_MEMORY_HEAP_FREE;
|
||||
uint32_t heap_id;
|
||||
uint64_t address;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue