x64 emitter first pass

This commit is contained in:
Anthony Pesch 2015-07-24 15:14:16 -07:00
parent 0cbb175853
commit 4beeeb52f5
16 changed files with 1470 additions and 149 deletions

View File

@ -52,6 +52,7 @@ set(DREAVM_SOURCES
src/cpu/backend/interpreter/interpreter_callbacks.cc
src/cpu/backend/x64/x64_backend.cc
src/cpu/backend/x64/x64_block.cc
src/cpu/backend/x64/x64_emitter.cc
src/cpu/frontend/sh4/sh4_builder.cc
src/cpu/frontend/sh4/sh4_emit.cc
src/cpu/frontend/sh4/sh4_frontend.cc
@ -268,6 +269,7 @@ set(DREAVM_TEST_SOURCES
src/cpu/backend/interpreter/interpreter_callbacks.cc
src/cpu/backend/x64/x64_backend.cc
src/cpu/backend/x64/x64_block.cc
src/cpu/backend/x64/x64_emitter.cc
src/cpu/frontend/sh4/sh4_builder.cc
src/cpu/frontend/sh4/sh4_emit.cc
src/cpu/frontend/sh4/sh4_frontend.cc

View File

@ -8,7 +8,6 @@ template <typename T>
T align(T v, T alignment) {
return (v + alignment - 1) & -alignment;
}
}
}

View File

@ -1,46 +1,28 @@
#include "core/core.h"
#include "cpu/backend/x64/x64_backend.h"
#include "cpu/backend/x64/x64_block.h"
using namespace dreavm::core;
using namespace dreavm::cpu;
using namespace dreavm::cpu::backend;
using namespace dreavm::cpu::backend::x64;
using namespace dreavm::cpu::ir;
using namespace dreavm::emu;
static Register x64_registers[] = {{"rax", VALUE_INT_MASK},
{"rbx", VALUE_INT_MASK},
{"rcx", VALUE_INT_MASK},
{"rdx", VALUE_INT_MASK},
{"rsi", VALUE_INT_MASK},
{"rdi", VALUE_INT_MASK},
{"rbp", VALUE_INT_MASK},
{"rsp", VALUE_INT_MASK},
{"r8", VALUE_INT_MASK},
{"r9", VALUE_INT_MASK},
{"r10", VALUE_INT_MASK},
{"r11", VALUE_INT_MASK},
{"r12", VALUE_INT_MASK},
{"r13", VALUE_INT_MASK},
{"r14", VALUE_INT_MASK},
{"r15", VALUE_INT_MASK},
{"mm0", VALUE_FLOAT_MASK},
{"mm1", VALUE_FLOAT_MASK},
{"mm2", VALUE_FLOAT_MASK},
{"mm3", VALUE_FLOAT_MASK},
{"mm4", VALUE_FLOAT_MASK},
{"mm5", VALUE_FLOAT_MASK},
{"mm6", VALUE_FLOAT_MASK},
{"mm7", VALUE_FLOAT_MASK}};
static Register x64_registers[] = {
{"rbx", VALUE_INT_MASK}, {"rbp", VALUE_INT_MASK},
{"r12", VALUE_INT_MASK}, {"r13", VALUE_INT_MASK},
{"r14", VALUE_INT_MASK}, {"r15", VALUE_INT_MASK},
{"xmm2", VALUE_FLOAT_MASK}, {"xmm3", VALUE_FLOAT_MASK},
{"xmm4", VALUE_FLOAT_MASK}, {"xmm5", VALUE_FLOAT_MASK},
{"xmm6", VALUE_FLOAT_MASK}, {"xmm7", VALUE_FLOAT_MASK}};
static const Xbyak::Reg *reg_map[] = {
&Xbyak::util::rax, &Xbyak::util::rbx, &Xbyak::util::rcx, &Xbyak::util::rdx,
&Xbyak::util::rsi, &Xbyak::util::rdi, &Xbyak::util::rbp, &Xbyak::util::rsp,
&Xbyak::util::r8, &Xbyak::util::r9, &Xbyak::util::r10, &Xbyak::util::r11,
&Xbyak::util::r12, &Xbyak::util::r13, &Xbyak::util::r14, &Xbyak::util::r15,
&Xbyak::util::mm0, &Xbyak::util::mm1, &Xbyak::util::mm2, &Xbyak::util::mm3,
&Xbyak::util::mm4, &Xbyak::util::mm5, &Xbyak::util::mm6, &Xbyak::util::mm7};
X64Backend::X64Backend(emu::Memory &memory) : Backend(memory) {}
X64Backend::X64Backend(emu::Memory &memory)
: Backend(memory),
// TODO allocate a 32mb buffer for code for now, this needs to be managed
// soon. Freed from when blocks are freed, etc.
codegen_(1024 * 1024 * 32),
emitter_(codegen_) {}
X64Backend::~X64Backend() {}
@ -53,36 +35,12 @@ int X64Backend::num_registers() const {
bool X64Backend::Init() { return true; }
std::unique_ptr<RuntimeBlock> X64Backend::AssembleBlock(IRBuilder &builder) {
int guest_cycles = 0;
X64Fn fn = emitter_.Emit(builder);
// 0. LOAD_CONTEXT 40 %0
// 1. LOAD_CONTEXT 36 %1
// 2. ADD %0 %1 %2 <--- ideally %0 and %2 should re-use the same register
// 3. STORE_CONTEXT 40 %2
// 4. LOAD_CONTEXT 16 %3
// 5. BRANCH %3
// get number of guest cycles for this block of code
const Value *md_guest_cycles = builder.GetMetadata(MD_GUEST_CYCLES);
CHECK(md_guest_cycles);
int guest_cycles = md_guest_cycles->value<int32_t>();
// RuntimeContext * is at RCX on Windows, RDI on OSX
for (auto block : builder.blocks()) {
for (auto instr : block->instrs()) {
if (instr->op() == OP_LOAD_CONTEXT) {
if (instr->arg0()->value<int32_t>() == 40) {
gen_.mov(*reg_map[instr->result()->reg()], gen_.dword[gen_.rdi + 40]);
} else if (instr->arg0()->value<int32_t>() == 36) {
gen_.mov(*reg_map[instr->result()->reg()], gen_.dword[gen_.rdi + 36]);
}
} else if (instr->op() == OP_ADD) {
gen_.add(*reg_map[instr->arg0()->reg()],
*reg_map[instr->arg1()->reg()]);
} else if (instr->op() == OP_STORE_CONTEXT) {
gen_.mov(gen_.dword[gen_.rdi + 40], *reg_map[instr->arg1()->reg()]);
} else if (instr->op() == OP_BRANCH) {
gen_.ret();
}
}
}
X64Fn fn = gen_.getCode<X64Fn>();
return std::unique_ptr<RuntimeBlock>(new X64Block(guest_cycles, fn));
}

View File

@ -3,6 +3,7 @@
#include <xbyak/xbyak.h>
#include "cpu/backend/backend.h"
#include "cpu/backend/x64/x64_emitter.h"
#include "cpu/runtime.h"
namespace dreavm {
@ -22,7 +23,8 @@ class X64Backend : public Backend {
std::unique_ptr<RuntimeBlock> AssembleBlock(ir::IRBuilder &builder);
private:
Xbyak::CodeGenerator gen_;
Xbyak::CodeGenerator codegen_;
X64Emitter emitter_;
};
}
}

View File

@ -1,3 +1,5 @@
#include <iomanip>
#include <beaengine/BeaEngine.h>
#include "cpu/backend/x64/x64_backend.h"
#include "cpu/backend/x64/x64_block.h"
#include "emu/profiler.h"
@ -12,6 +14,32 @@ X64Block::X64Block(int guest_cycles, X64Fn fn)
X64Block::~X64Block() {}
uint32_t X64Block::Call(emu::Memory *memory, void *guest_ctx) {
fn_(guest_ctx);
return 0xdeadbeef;
return fn_(guest_ctx, memory);
}
void X64Block::Dump() {
DISASM dsm;
dsm.Archi = 64;
dsm.EIP = (uintptr_t)fn_;
dsm.SecurityBlock = 0;
while (true) {
int len = Disasm(&dsm);
if (len == OUT_OF_BLOCK) {
LOG(INFO) << "Disasm engine is not allowed to read more memory";
break;
} else if (len == UNKNOWN_OPCODE) {
LOG(INFO) << "Unknown opcode";
break;
}
LOG(INFO) << std::setw(2) << std::hex << std::setfill('0')
<< (int)dsm.VirtualAddr << " " << dsm.CompleteInstr;
if (dsm.Instruction.BranchType == RetType) {
break;
}
dsm.EIP = dsm.EIP + len;
}
}

View File

@ -4,13 +4,14 @@
#include <memory>
#include "cpu/ir/ir_builder.h"
#include "cpu/runtime.h"
#include "emu/memory.h"
namespace dreavm {
namespace cpu {
namespace backend {
namespace x64 {
typedef void (*X64Fn)(void *guest_ctx);
typedef uint32_t (*X64Fn)(void *guest_ctx, emu::Memory *memory);
class X64Block : public RuntimeBlock {
public:
@ -18,6 +19,7 @@ class X64Block : public RuntimeBlock {
~X64Block();
uint32_t Call(emu::Memory *memory, void *guest_ctx);
void Dump();
private:
X64Fn fn_;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,49 @@
#ifndef X64_EMITTER_H
#define X64_EMITTER_H
#include <memory>
#include <xbyak/xbyak.h>
#include "core/arena.h"
#include "cpu/backend/x64/x64_block.h"
#include "cpu/runtime.h"
namespace dreavm {
namespace cpu {
namespace backend {
namespace x64 {
enum {
STACK_OFFSET_GUEST_CONTEXT = 0,
STACK_OFFSET_MEMORY = 8,
STACK_OFFSET_LOCALS = 16
};
class X64Emitter {
public:
X64Emitter(Xbyak::CodeGenerator &codegen);
X64Fn Emit(ir::IRBuilder &builder);
// helpers for the emitter callbacks
const Xbyak::Operand &GetOperand(const ir::Value *v, int size = -1);
const Xbyak::Operand &GetOperand(const ir::Value *v,
const Xbyak::Operand &tmp);
const Xbyak::Reg &GetRegister(const ir::Value *v, const Xbyak::Reg &tmp);
const Xbyak::Xmm &GetXMMRegister(const ir::Value *v, const Xbyak::Xmm &tmp);
const Xbyak::Xmm &GetXMMRegister(const ir::Value *v,
const Xbyak::Operand &prefered,
const Xbyak::Xmm &tmp);
void CopyOperand(const Xbyak::Operand &from, const Xbyak::Operand &to);
void CopyOperand(const ir::Value *v, const Xbyak::Operand &to);
bool CanEncodeAsImmediate(const ir::Value *v);
private:
Xbyak::CodeGenerator &c_;
core::Arena operand_arena_;
};
}
}
}
}
#endif

View File

@ -11,31 +11,6 @@ const char *dreavm::cpu::ir::Opnames[NUM_OPCODES] = {
#include "cpu/ir/ir_ops.inc"
};
static inline bool IsFloatType(ValueTy type) {
return type == VALUE_F32 || type == VALUE_F64;
}
static inline bool IsIntType(ValueTy type) { return !IsFloatType(type); }
static inline int SizeForType(ValueTy type) {
switch (type) {
case VALUE_I8:
return 1;
case VALUE_I16:
return 2;
case VALUE_I32:
return 4;
case VALUE_I64:
return 8;
case VALUE_F32:
return 4;
case VALUE_F64:
return 8;
case VALUE_BLOCK:
return 4;
}
}
//
// Value
//
@ -175,7 +150,7 @@ void IRBuilder::Dump() const {
auto res = value_vars.insert(std::make_pair((intptr_t)v, name));
it = res.first;
}
ss << it->second;
ss << it->second << " (" << v->reg() << ")";
};
auto DumpValue = [&](std::stringstream &ss, const Value *v) {
if (!v) {
@ -324,6 +299,9 @@ void IRBuilder::Store(Value *addr, Value *v) {
}
Value *IRBuilder::Cast(Value *v, ValueTy dest_type) {
CHECK((IsIntType(v->type()) && IsFloatType(dest_type)) ||
(IsFloatType(v->type()) && IsIntType(dest_type)));
Instr *instr = AppendInstr(OP_CAST);
Value *result = AllocDynamic(dest_type);
instr->set_arg0(v);

View File

@ -87,6 +87,31 @@ class Block;
class Instr;
class ValueRef;
static inline bool IsFloatType(ValueTy type) {
return type == VALUE_F32 || type == VALUE_F64;
}
static inline bool IsIntType(ValueTy type) { return !IsFloatType(type); }
static inline int SizeForType(ValueTy type) {
switch (type) {
case VALUE_I8:
return 1;
case VALUE_I16:
return 2;
case VALUE_I32:
return 4;
case VALUE_I64:
return 8;
case VALUE_F32:
return 4;
case VALUE_F64:
return 8;
case VALUE_BLOCK:
return 4;
}
}
class Value {
public:
Value(ValueTy ty);

View File

@ -2,6 +2,7 @@
#include "cpu/ir/passes/register_allocation_pass.h"
using namespace dreavm;
using namespace dreavm::cpu::backend;
using namespace dreavm::cpu::ir;
using namespace dreavm::cpu::ir::passes;
@ -33,7 +34,7 @@ void RegisterAllocationPass::Run(IRBuilder &builder) {
Value *result = instr->result();
// only allocate registers for results, assume constants can always be
// encoded by immediates or that the backend has registers reserved
// encoded as immediates or that the backend has registers reserved
// for storing the constants
if (!result) {
continue;
@ -47,36 +48,16 @@ void RegisterAllocationPass::Run(IRBuilder &builder) {
// expire any old intervals, freeing up the registers they claimed
ExpireOldIntervals(start);
// if the last argument isn't used after this instruction, its register
// can be reused to take advantage of many architectures supporting
// operations where the destination is the last source argument
// FIXME could reorder arguments and do this with any source arguments
// meeting the criteria
Value *last_arg = instr->arg2()
? instr->arg2()
: (instr->arg1() ? instr->arg1() : instr->arg0());
if (last_arg && !last_arg->constant()) {
// get the current interval for this register
int last_reg = last_arg->reg();
if (last_reg != NO_REGISTER) {
const std::multiset<Interval>::iterator &it = live_[last_reg];
// if the argument isn't used after this instruction, reuse its
// register for the result
if (GetOrdinal(it->end) <= GetOrdinal(start)) {
UpdateInterval(it, result, start, end);
result->set_reg(last_reg);
continue;
}
}
}
// else, allocate a new register
int reg = AllocFreeRegister(result, start, end);
// first, try and reuse the register of one of the incoming arguments
int reg = ReuuseArgRegister(instr, start, end);
if (reg == NO_REGISTER) {
reg = AllocBlockedRegister(builder, result, start, end);
CHECK_NE(reg, NO_REGISTER);
// else, allocate a new register for the result
reg = AllocFreeRegister(result, start, end);
if (reg == NO_REGISTER) {
// if a register couldn't be allocated, spill a register and try again
reg = AllocBlockedRegister(builder, result, start, end);
CHECK_NE(reg, NO_REGISTER);
}
}
result->set_reg(reg);
@ -143,9 +124,6 @@ void RegisterAllocationPass::UpdateInterval(
Instr *end) {
int reg = it->reg;
// printf("UpdateRegister %d (%p) -> %d (%p) : (%p)\n", GetOrdinal(start),
// start, GetOrdinal(end), end, value);
// remove the old interval
intervals_.erase(it);
@ -160,19 +138,60 @@ void RegisterAllocationPass::UpdateInterval(
live_[reg] = intervals_.insert(interval);
}
int RegisterAllocationPass::AllocFreeRegister(Value *value, Instr *start,
// If the first argument isn't used after this instruction, its register
// can be reused to take advantage of many architectures supporting
// operations where the destination is the first argument.
// TODO could reorder arguments for communicative binary ops and do this
// with the second argument as well
int RegisterAllocationPass::ReuuseArgRegister(Instr *instr, Instr *start,
Instr *end) {
if (!num_free_) {
// LOG(WARNING) << "AllocFreeRegister failed for " << GetOrdinal(start);
if (!instr->arg0() || instr->arg0()->constant()) {
return NO_REGISTER;
}
// printf("AllocFreeRegister %d (%p) -> %d (%p) : (%p)\n", GetOrdinal(start),
// start, GetOrdinal(end), end, value);
int last_reg = instr->arg0()->reg();
if (last_reg == NO_REGISTER) {
return NO_REGISTER;
}
// make sure the register can hold the result type
const Register &r = registers_[last_reg];
if (!(r.value_types & 1 << (instr->result()->type()))) {
return NO_REGISTER;
}
// if the argument's register is used after this instruction, it can't be
// reused
const std::multiset<Interval>::iterator &it = live_[last_reg];
if (GetOrdinal(it->end) > GetOrdinal(start)) {
return NO_REGISTER;
}
// the argument's register isn't used afterwards, update its interval and
// reuse
UpdateInterval(it, instr->result(), start, end);
return last_reg;
}
int RegisterAllocationPass::AllocFreeRegister(Value *value, Instr *start,
Instr *end) {
// find the first free register that can store this value type
// TODO split up free queue into int / float to avoid this scan
int i;
for (i = 0; i < num_free_; i++) {
const Register &r = registers_[free_[i]];
if (r.value_types & 1 << (value->type())) {
break;
}
}
if (i == num_free_) {
return NO_REGISTER;
}
// remove register from free queue
int reg = free_[0];
free_[0] = free_[--num_free_];
int reg = free_[i];
free_[i] = free_[--num_free_];
// add interval
Interval interval;
@ -191,24 +210,31 @@ int RegisterAllocationPass::AllocFreeRegister(Value *value, Instr *start,
int RegisterAllocationPass::AllocBlockedRegister(IRBuilder &builder,
Value *value, Instr *start,
Instr *end) {
CHECK_EQ(num_free_, 0);
CHECK_EQ(num_registers_, (int)intervals_.size());
// TODO no longer valid due to type masks
// CHECK_EQ(num_free_, 0);
// CHECK_EQ(num_registers_, (int)intervals_.size());
// spill the register that ends furthest away that can store this type
auto it = intervals_.rbegin();
auto e = intervals_.rend();
for (; it != e; ++it) {
const Register &r = registers_[it->reg];
if (r.value_types & 1 << (value->type())) {
break;
}
}
CHECK(it != e);
// spill the register that ends furthest away, or possibly this register
// itself
auto it = --intervals_.end();
const Interval &to_spill = *it;
// point spilled value to use stack
to_spill.value->set_reg(NO_REGISTER);
to_spill.value->set_local(builder.AllocLocal(to_spill.value->type()));
// printf("Spilling %d (%p) -> %d (%p) : (%p)\n", GetOrdinal(to_spill.start),
// to_spill.start, GetOrdinal(to_spill.end), to_spill.end, to_spill.value);
// remove interval
free_[num_free_++] = to_spill.reg;
intervals_.erase(it);
intervals_.erase(--it.base());
return AllocFreeRegister(value, start, end);
}

View File

@ -52,6 +52,7 @@ class RegisterAllocationPass : public Pass {
void ExpireOldIntervals(Instr *start);
void UpdateInterval(const std::multiset<Interval>::iterator &it, Value *value,
Instr *start, Instr *end);
int ReuuseArgRegister(Instr *instr, Instr *start, Instr *end);
int AllocFreeRegister(Value *value, Instr *start, Instr *end);
int AllocBlockedRegister(IRBuilder &builder, Value *value, Instr *start,
Instr *end);

View File

@ -79,7 +79,7 @@ void Runtime::ResetBlocks() { pending_reset_ = true; }
RuntimeBlock *Runtime::CompileBlock(uint32_t addr) {
PROFILER_SCOPE_F("runtime");
// LOG(INFO) << "Compiling block 0x" << std::hex << addr;
LOG(INFO) << "Compiling block 0x" << std::hex << addr;
std::unique_ptr<IRBuilder> builder = frontend_->BuildBlock(addr);
if (!builder) {

View File

@ -256,8 +256,8 @@ void SH4::InitMemory() {
void SH4::InitContext() {
memset(&ctx_, 0, sizeof(ctx_));
ctx_.sh4 = this;
// ctx_.pc = 0xa0000000;
ctx_.pc = 0x0c010000;
ctx_.pc = 0xa0000000;
// ctx_.pc = 0x0c010000;
ctx_.pr = 0xdeadbeef;
#define SH4_REG(addr, name, flags, default, reset, sleep, standby, type) \
if (default != HELD) { \

View File

@ -24,8 +24,8 @@ Emulator::Emulator(System &sys)
processor_(scheduler_, memory_),
holly_(scheduler_, memory_, processor_) {
rt_frontend_ = new SH4Frontend(memory_);
rt_backend_ = new InterpreterBackend(memory_);
// rt_backend_ = new X64Backend(*memory_);
// rt_backend_ = new InterpreterBackend(memory_);
rt_backend_ = new X64Backend(memory_);
rb_ = new GLBackend(sys);
}

View File

@ -24,8 +24,8 @@ void RunSH4Test(const SH4Test &test) {
// initialize runtime
frontend::sh4::SH4Frontend rt_frontend(memory);
// backend::x64::X64Backend rt_backend(memory);
backend::interpreter::InterpreterBackend rt_backend(memory);
backend::x64::X64Backend rt_backend(memory);
// backend::interpreter::InterpreterBackend rt_backend(memory);
Runtime runtime(memory);
ASSERT_TRUE(runtime.Init(&rt_frontend, &rt_backend));