mirror of https://github.com/inolen/redream.git
only push / pop callee-saved registers that are modified
This commit is contained in:
parent
5b3aad63b3
commit
1265209ab6
|
@ -9,18 +9,28 @@ using namespace dreavm::jit::backend;
|
|||
using namespace dreavm::jit::backend::x64;
|
||||
using namespace dreavm::jit::ir;
|
||||
|
||||
static const Register x64_registers[] = {{"rbx", ir::VALUE_INT_MASK},
|
||||
{"rbp", ir::VALUE_INT_MASK},
|
||||
{"r12", ir::VALUE_INT_MASK},
|
||||
{"r13", ir::VALUE_INT_MASK},
|
||||
{"r14", ir::VALUE_INT_MASK},
|
||||
{"r15", ir::VALUE_INT_MASK},
|
||||
{"xmm6", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm7", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm8", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm9", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm10", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm11", ir::VALUE_FLOAT_MASK}};
|
||||
namespace dreavm {
|
||||
namespace jit {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
const Register x64_registers[] = {{"rbx", ir::VALUE_INT_MASK},
|
||||
{"rbp", ir::VALUE_INT_MASK},
|
||||
{"r12", ir::VALUE_INT_MASK},
|
||||
{"r13", ir::VALUE_INT_MASK},
|
||||
{"r14", ir::VALUE_INT_MASK},
|
||||
{"r15", ir::VALUE_INT_MASK},
|
||||
{"xmm6", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm7", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm8", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm9", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm10", ir::VALUE_FLOAT_MASK},
|
||||
{"xmm11", ir::VALUE_FLOAT_MASK}};
|
||||
|
||||
const int x64_num_registers = sizeof(x64_registers) / sizeof(Register);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
X64Backend::X64Backend(Memory &memory) : Backend(memory), emitter_(memory) {}
|
||||
|
||||
|
|
|
@ -10,6 +10,9 @@ namespace jit {
|
|||
namespace backend {
|
||||
namespace x64 {
|
||||
|
||||
extern const Register x64_registers[];
|
||||
extern const int x64_num_registers;
|
||||
|
||||
class X64Backend : public Backend {
|
||||
public:
|
||||
X64Backend(hw::Memory &memory);
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include "core/core.h"
|
||||
#include "emu/profiler.h"
|
||||
#include "jit/backend/x64/x64_backend.h"
|
||||
#include "jit/backend/x64/x64_emitter.h"
|
||||
|
||||
using namespace dreavm;
|
||||
|
@ -61,6 +62,14 @@ static const Xbyak::Reg *reg_map_64[] = {
|
|||
&Xbyak::util::xmm6, &Xbyak::util::xmm7, &Xbyak::util::xmm8,
|
||||
&Xbyak::util::xmm9, &Xbyak::util::xmm10, &Xbyak::util::xmm11};
|
||||
|
||||
// map register ids coming from IR values for callee saved registers. use
|
||||
// nullptr to specify that the register isn't saved
|
||||
static const Xbyak::Reg *callee_save_map[] = {
|
||||
&Xbyak::util::rbx, &Xbyak::util::rbp, &Xbyak::util::r12, &Xbyak::util::r13,
|
||||
&Xbyak::util::r14, &Xbyak::util::r15, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr};
|
||||
|
||||
#ifdef PLATFORM_WINDOWS
|
||||
static const Xbyak::Reg &int_arg0 = Xbyak::util::rcx;
|
||||
static const Xbyak::Reg &int_arg1 = Xbyak::util::rdx;
|
||||
|
@ -91,9 +100,21 @@ static X64Emit x64_emitters[NUM_OPCODES];
|
|||
const Instr *instr)
|
||||
|
||||
X64Emitter::X64Emitter(Memory &memory)
|
||||
: memory_(memory), c_(1024 * 1024 * 8), arena_(1024) {}
|
||||
: memory_(memory), c_(1024 * 1024 * 8), arena_(1024) {
|
||||
modified_marker_ = 0;
|
||||
modified_ = new int[x64_num_registers];
|
||||
|
||||
void X64Emitter::Reset() { c_.reset(); }
|
||||
Reset();
|
||||
}
|
||||
|
||||
X64Emitter::~X64Emitter() { delete[] modified_; }
|
||||
|
||||
void X64Emitter::Reset() {
|
||||
c_.reset();
|
||||
|
||||
modified_marker_ = 0;
|
||||
memset(modified_, modified_marker_, sizeof(int) * x64_num_registers);
|
||||
}
|
||||
|
||||
bool X64Emitter::Emit(IRBuilder &builder, X64Fn *fn) {
|
||||
PROFILER_RUNTIME("X64Emitter::Emit");
|
||||
|
@ -102,95 +123,26 @@ bool X64Emitter::Emit(IRBuilder &builder, X64Fn *fn) {
|
|||
// is about to emitted to
|
||||
*fn = c_.getCurr<X64Fn>();
|
||||
|
||||
// reset arena holding temporaries used while emitting
|
||||
// reset emit state
|
||||
arena_.Reset();
|
||||
|
||||
// allocate the epilog label
|
||||
epilog_label_ = AllocLabel();
|
||||
|
||||
// assign local offsets
|
||||
int stack_size = STACK_SIZE;
|
||||
for (auto local : builder.locals()) {
|
||||
int type_size = SizeForType(local->type());
|
||||
stack_size = dreavm::align(stack_size, type_size);
|
||||
local->set_offset(builder.AllocConstant(stack_size));
|
||||
stack_size += type_size;
|
||||
}
|
||||
|
||||
// stack must be 16 byte aligned
|
||||
stack_size = dreavm::align(stack_size, 16);
|
||||
|
||||
// add 8 for return address which will be pushed when this is called
|
||||
stack_size += 8;
|
||||
|
||||
// emit prolog
|
||||
// FIXME only push registers that're used
|
||||
#ifdef PLATFORM_WINDOWS
|
||||
c_.push(Xbyak::util::rdi);
|
||||
c_.push(Xbyak::util::rsi);
|
||||
#endif
|
||||
c_.push(Xbyak::util::rbx);
|
||||
c_.push(Xbyak::util::rbp);
|
||||
c_.push(Xbyak::util::r12);
|
||||
c_.push(Xbyak::util::r13);
|
||||
c_.push(Xbyak::util::r14);
|
||||
c_.push(Xbyak::util::r15);
|
||||
|
||||
// reserve stack space for rdi copy
|
||||
c_.sub(Xbyak::util::rsp, stack_size);
|
||||
c_.mov(c_.qword[Xbyak::util::rsp + STACK_OFFSET_GUEST_CONTEXT], int_arg0);
|
||||
c_.mov(c_.qword[Xbyak::util::rsp + STACK_OFFSET_MEMORY], int_arg1);
|
||||
|
||||
// generate labels for each block
|
||||
for (auto block : builder.blocks()) {
|
||||
Xbyak::Label *lbl = AllocLabel();
|
||||
SetLabel(block, lbl);
|
||||
}
|
||||
|
||||
// emit each instruction
|
||||
for (auto block : builder.blocks()) {
|
||||
c_.L(GetLabel(block));
|
||||
|
||||
for (auto instr : block->instrs()) {
|
||||
X64Emit emit = x64_emitters[instr->op()];
|
||||
CHECK(emit, "Failed to find emitter for %s", Opnames[instr->op()]);
|
||||
|
||||
// try to generate the x64 code. if the codegen buffer overflows let the
|
||||
// backend know so it can reset the cache and try again
|
||||
try {
|
||||
emit(*this, memory_, c_, instr);
|
||||
} catch (const Xbyak::Error &e) {
|
||||
if (e == Xbyak::ERR_CODE_IS_TOO_BIG) {
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_FATAL("X64 codegen failure, %s", e.what());
|
||||
}
|
||||
// try to generate the x64 code. if the codegen buffer overflows let the
|
||||
// backend know so it can reset the cache and try again
|
||||
try {
|
||||
int stack_size = 0;
|
||||
EmitProlog(builder, &stack_size);
|
||||
EmitBody(builder);
|
||||
EmitEpilog(builder, stack_size);
|
||||
c_.ready();
|
||||
} catch (const Xbyak::Error &e) {
|
||||
if (e == Xbyak::ERR_CODE_IS_TOO_BIG) {
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_FATAL("X64 codegen failure, %s", e.what());
|
||||
}
|
||||
|
||||
// emit prolog
|
||||
c_.L(epilog_label());
|
||||
|
||||
// reset stack
|
||||
c_.add(Xbyak::util::rsp, stack_size);
|
||||
|
||||
// TODO only pop registers that're used
|
||||
c_.pop(Xbyak::util::r15);
|
||||
c_.pop(Xbyak::util::r14);
|
||||
c_.pop(Xbyak::util::r13);
|
||||
c_.pop(Xbyak::util::r12);
|
||||
c_.pop(Xbyak::util::rbp);
|
||||
c_.pop(Xbyak::util::rbx);
|
||||
#ifdef PLATFORM_WINDOWS
|
||||
c_.pop(Xbyak::util::rsi);
|
||||
c_.pop(Xbyak::util::rdi);
|
||||
#endif
|
||||
c_.ret();
|
||||
|
||||
// patch up relocations
|
||||
c_.ready();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -206,6 +158,114 @@ Xbyak::Address *X64Emitter::AllocAddress(const Xbyak::Address &from) {
|
|||
return addr;
|
||||
}
|
||||
|
||||
void X64Emitter::EmitProlog(IRBuilder &builder, int *out_stack_size) {
|
||||
int stack_size = STACK_SIZE;
|
||||
|
||||
// align locals
|
||||
for (auto local : builder.locals()) {
|
||||
int type_size = SizeForType(local->type());
|
||||
stack_size = dreavm::align(stack_size, type_size);
|
||||
local->set_offset(builder.AllocConstant(stack_size));
|
||||
stack_size += type_size;
|
||||
}
|
||||
|
||||
// stack must be 16 byte aligned
|
||||
stack_size = dreavm::align(stack_size, 16);
|
||||
|
||||
// add 8 for return address which will be pushed when this is called
|
||||
stack_size += 8;
|
||||
CHECK_EQ((stack_size + 8) % 16, 0);
|
||||
|
||||
// mark which registers have been modified
|
||||
modified_marker_++;
|
||||
|
||||
for (auto block : builder.blocks()) {
|
||||
for (auto instr : block->instrs()) {
|
||||
Value *result = instr->result();
|
||||
if (!result) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int i = result->reg();
|
||||
if (i == NO_REGISTER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
modified_[i] = modified_marker_;
|
||||
}
|
||||
}
|
||||
|
||||
// push the callee-saved registers which have been modified
|
||||
int pushed = 0;
|
||||
|
||||
for (int i = 0; i < x64_num_registers; i++) {
|
||||
const Xbyak::Reg *reg = callee_save_map[i];
|
||||
if (!reg) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (modified_[i] == modified_marker_) {
|
||||
c_.push(*reg);
|
||||
pushed++;
|
||||
}
|
||||
}
|
||||
|
||||
// if an odd amount of push instructions are emitted stack_size needs to be
|
||||
// adjusted to keep the stack aligned
|
||||
if ((pushed % 2) == 1) {
|
||||
stack_size += 8;
|
||||
}
|
||||
|
||||
// adjust stack pointer
|
||||
c_.sub(Xbyak::util::rsp, stack_size);
|
||||
|
||||
// save off arguments to stack in case they need to be restored
|
||||
c_.mov(c_.qword[Xbyak::util::rsp + STACK_OFFSET_GUEST_CONTEXT], int_arg0);
|
||||
c_.mov(c_.qword[Xbyak::util::rsp + STACK_OFFSET_MEMORY], int_arg1);
|
||||
|
||||
*out_stack_size = stack_size;
|
||||
}
|
||||
|
||||
void X64Emitter::EmitBody(IRBuilder &builder) {
|
||||
// generate labels for each block
|
||||
for (auto block : builder.blocks()) {
|
||||
Xbyak::Label *lbl = AllocLabel();
|
||||
SetLabel(block, lbl);
|
||||
}
|
||||
|
||||
// emit each instruction
|
||||
for (auto block : builder.blocks()) {
|
||||
c_.L(GetLabel(block));
|
||||
|
||||
for (auto instr : block->instrs()) {
|
||||
X64Emit emit = x64_emitters[instr->op()];
|
||||
CHECK(emit, "Failed to find emitter for %s", Opnames[instr->op()]);
|
||||
emit(*this, memory_, c_, instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void X64Emitter::EmitEpilog(IRBuilder &builder, int stack_size) {
|
||||
c_.L(epilog_label());
|
||||
|
||||
// adjust stack pointer
|
||||
c_.add(Xbyak::util::rsp, stack_size);
|
||||
|
||||
// pop callee-saved registers which have been modified
|
||||
for (int i = x64_num_registers - 1; i >= 0; i--) {
|
||||
const Xbyak::Reg *reg = callee_save_map[i];
|
||||
if (!reg) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (modified_[i] == modified_marker_) {
|
||||
c_.pop(*reg);
|
||||
}
|
||||
}
|
||||
|
||||
c_.ret();
|
||||
}
|
||||
|
||||
// Get the register / local allocated for the supplied value. If the value is
|
||||
// a constant, copy it to a temporary register. The size argument can be
|
||||
// overridden to get a truncated version of the value.
|
||||
|
|
|
@ -30,6 +30,7 @@ typedef uint32_t (*X64Fn)(void *guest_ctx, hw::Memory *memory);
|
|||
class X64Emitter {
|
||||
public:
|
||||
X64Emitter(hw::Memory &memory);
|
||||
~X64Emitter();
|
||||
|
||||
Xbyak::Label &epilog_label() { return *epilog_label_; }
|
||||
|
||||
|
@ -53,10 +54,20 @@ class X64Emitter {
|
|||
Xbyak::Label *AllocLabel();
|
||||
Xbyak::Address *AllocAddress(const Xbyak::Address &addr);
|
||||
|
||||
void EmitProlog(ir::IRBuilder &builder, int *stack_size);
|
||||
void EmitBody(ir::IRBuilder &builder);
|
||||
void EmitEpilog(ir::IRBuilder &builder, int stack_size);
|
||||
|
||||
int AlignLocals(ir::IRBuilder &builder);
|
||||
int PushModifiedRegisters(ir::IRBuilder &builder);
|
||||
void PopModifiedRegisters();
|
||||
|
||||
hw::Memory &memory_;
|
||||
Xbyak::CodeGenerator c_;
|
||||
Arena arena_;
|
||||
Xbyak::Label *epilog_label_;
|
||||
int modified_marker_;
|
||||
int *modified_;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -154,9 +154,9 @@ TextureHandle GLBackend::RegisterTexture(PixelFormat format, FilterMode filter,
|
|||
}
|
||||
|
||||
void GLBackend::FreeTexture(TextureHandle handle) {
|
||||
GLuint &gltex = textures_[handle];
|
||||
glDeleteTextures(1, &gltex);
|
||||
gltex = 0;
|
||||
GLuint *gltex = &textures_[handle];
|
||||
glDeleteTextures(1, gltex);
|
||||
*gltex = 0;
|
||||
}
|
||||
|
||||
void GLBackend::BeginFrame() {
|
||||
|
|
Loading…
Reference in New Issue