Re-enabling x64 backend, fixing many bugs.

This commit is contained in:
Ben Vanik 2014-02-10 23:24:46 -08:00
parent 4a584129d2
commit 74c9df6697
11 changed files with 320 additions and 333 deletions

View File

@ -11,6 +11,6 @@
'includes': [
'ivm/sources.gypi',
#'x64/sources.gypi',
'x64/sources.gypi',
],
}

View File

@ -2500,7 +2500,7 @@ table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) {
} else if (IsVecType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
// dest_src ^= 0xFFFF...
if (dest != src) {
if (dest.getIdx() != src.getIdx()) {
e.movaps(dest, src);
}
e.mov(e.rax, XMMCONSTBASE);
@ -2697,7 +2697,7 @@ table->AddSequence(OPCODE_BYTE_SWAP, [](X64Emitter& e, Instr*& i) {
Reg32 dest, src1;
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0);
if (dest != src1) {
if (dest.getIdx() != src1.getIdx()) {
e.mov(dest, src1);
e.bswap(dest);
} else {
@ -2708,7 +2708,7 @@ table->AddSequence(OPCODE_BYTE_SWAP, [](X64Emitter& e, Instr*& i) {
Reg64 dest, src1;
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0);
if (dest != src1) {
if (dest.getIdx() != src1.getIdx()) {
e.mov(dest, src1);
e.bswap(dest);
} else {
@ -2972,7 +2972,7 @@ table->AddSequence(OPCODE_PERMUTE, [](X64Emitter& e, Instr*& i) {
(((control >> 18) & 0x1) << 1) |
(((control >> 10) & 0x1) << 2) |
(((control >> 2) & 0x1) << 3);
if (dest != src3) {
if (dest.getIdx() != src3.getIdx()) {
e.pshufd(dest, src2, src_control);
e.pshufd(e.xmm0, src3, src_control);
e.blendps(dest, e.xmm0, blend_control);

View File

@ -311,9 +311,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign
if (op == VECTOR_CMP_EQ) {
// Commutative, so simple.
Xmm real_src;
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
real_src = src2;
} else if (dest == src2) {
} else if (dest.getIdx() == src2.getIdx()) {
real_src = src1;
} else {
e.movaps(dest, src1);
@ -334,9 +334,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign
// Float GT/GE must be emulated.
if (op == VECTOR_CMP_GT) {
// Have to swap: src2 < src1.
if (dest == src2) {
if (dest.getIdx() == src2.getIdx()) {
e.cmpltps(dest, src1);
} else if (dest == src1) {
} else if (dest.getIdx() == src1.getIdx()) {
e.movaps(e.xmm0, src1);
e.movaps(dest, src2);
e.cmpltps(dest, e.xmm0);
@ -346,9 +346,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign
}
} else if (op == VECTOR_CMP_GE) {
// Have to swap: src2 <= src1.
if (dest == src2) {
if (dest.getIdx() == src2.getIdx()) {
e.cmpleps(dest, src1);
} else if (dest == src1) {
} else if (dest.getIdx() == src1.getIdx()) {
e.movaps(e.xmm0, src1);
e.movaps(dest, src2);
e.cmpleps(dest, e.xmm0);
@ -362,9 +362,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign
} else {
// Integer types are easier.
Xmm real_src;
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
real_src = src2;
} else if (dest == src2) {
} else if (dest.getIdx() == src2.getIdx()) {
e.movaps(e.xmm0, src2);
e.movaps(dest, src1);
real_src = e.xmm0;
@ -429,7 +429,7 @@ void IntUnaryOpV(X64Emitter& e, Instr*& i, v_fn v_fn,
T& dest, T& src1) {
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0);
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
v_fn(e, *i, dest);
} else {
e.mov(dest, src1);
@ -486,9 +486,9 @@ void IntBinaryOpVV(X64Emitter& e, Instr*& i, vv_fn vv_fn,
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0,
i->src2.value, src2, 0);
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
vv_fn(e, *i, dest, src2);
} else if (dest == src2) {
} else if (dest.getIdx() == src2.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
vv_fn(e, *i, dest, src1);
} else {
@ -511,7 +511,7 @@ void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn,
i->src1.value, src1, 0);
if (dest.getBit() <= 32) {
// 32-bit.
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
vc_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()));
} else {
e.mov(dest, src1);
@ -519,7 +519,7 @@ void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn,
}
} else {
// 64-bit.
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
e.mov(TEMP_REG, src2->constant.i64);
vv_fn(e, *i, dest, TEMP_REG);
} else {
@ -537,7 +537,7 @@ void IntBinaryOpCV(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn,
i->src2.value, src2, 0);
if (dest.getBit() <= 32) {
// 32-bit.
if (dest == src2) {
if (dest.getIdx() == src2.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
vc_fn(e, *i, dest, (uint32_t)src1->get_constant(CT()));
} else {
@ -559,7 +559,7 @@ void IntBinaryOpCV(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn,
}
} else {
// 64-bit.
if (dest == src2) {
if (dest.getIdx() == src2.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
e.mov(TEMP_REG, src1->constant.i64);
vv_fn(e, *i, dest, TEMP_REG);
@ -669,14 +669,19 @@ void IntTernaryOpVVV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn,
i->src1.value, src1, 0,
i->src2.value, src2, 0,
i->src3.value, src3, 0);
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
vvv_fn(e, *i, dest, src2, src3);
} else if (dest == src2) {
} else if (dest.getIdx() == src2.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
vvv_fn(e, *i, dest, src1, src3);
} else {
UNIMPLEMENTED_SEQ();
}
} else if (dest.getIdx() == src3.getIdx()) {
auto Ntx = TEMP_LIKE(src3);
e.mov(Ntx, src3);
e.mov(dest, src1);
vvv_fn(e, *i, dest, src2, Ntx);
} else {
e.mov(dest, src1);
vvv_fn(e, *i, dest, src2, src3);
@ -691,7 +696,7 @@ void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn,
i->src2.value, src2, 0);
if (dest.getBit() <= 32) {
// 32-bit.
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
vvc_fn(e, *i, dest, src2, (uint32_t)src3->get_constant(CT()));
} else if (dest == src2) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
@ -709,10 +714,10 @@ void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn,
}
} else {
// 64-bit.
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
e.mov(TEMP_REG, src3->constant.i64);
vvv_fn(e, *i, dest, src2, TEMP_REG);
} else if (dest == src2) {
} else if (dest.getIdx() == src2.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
e.mov(TEMP_REG, src3->constant.i64);
vvv_fn(e, *i, dest, src1, TEMP_REG);
@ -740,9 +745,9 @@ void IntTernaryOpVCV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vcv_fn vcv_fn,
i->src3.value, src3, 0);
if (dest.getBit() <= 32) {
// 32-bit.
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src3);
} else if (dest == src3) {
} else if (dest.getIdx() == src3.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src1);
} else {
@ -758,10 +763,10 @@ void IntTernaryOpVCV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vcv_fn vcv_fn,
}
} else {
// 64-bit.
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
e.mov(TEMP_REG, src2->constant.i64);
vvv_fn(e, *i, dest, TEMP_REG, src3);
} else if (dest == src3) {
} else if (dest.getIdx() == src3.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
e.mov(TEMP_REG, src2->constant.i64);
vvv_fn(e, *i, dest, src1, TEMP_REG);
@ -817,16 +822,20 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn
IntTernaryOpVVC<int8_t>(e, i, vvv_fn, vvc_fn, dest, src1, src2, i->src3.value);
//
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8C, SIG_TYPE_I8)) {
Reg8 dest, src1, src3;
Reg8 dest, src1;
Reg8 src3;
IntTernaryOpVCV<int8_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16C, SIG_TYPE_I8)) {
Reg16 dest, src1, src3;
Reg16 dest, src1;
Reg8 src3;
IntTernaryOpVCV<int16_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32C, SIG_TYPE_I8)) {
Reg32 dest, src1, src3;
Reg32 dest, src1;
Reg8 src3;
IntTernaryOpVCV<int32_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64C, SIG_TYPE_I8)) {
Reg64 dest, src1, src3;
Reg64 dest, src1;
Reg8 src3;
IntTernaryOpVCV<int64_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
} else {
ASSERT_INVALID_TYPE();
@ -856,7 +865,7 @@ void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
e.mov(e.rax, (uint64_t)src1->constant.i64);
e.movq(dest, e.rax);
} else {
UNIMPLEMENTED_SEQ();
LoadXmmConstant(e, dest, src1->constant.v128);
}
v_fn(e, *i, dest, dest);
e.EndOp(dest);
@ -901,9 +910,9 @@ void XmmBinaryOpVV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0,
i->src2.value, src2, 0);
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
vv_fn(e, *i, dest, src2);
} else if (dest == src2) {
} else if (dest.getIdx() == src2.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
vv_fn(e, *i, dest, src1);
} else {
@ -934,7 +943,7 @@ void XmmBinaryOpVC(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
}
vv_fn(e, *i, dest, src1);
} else {
if (dest != src1) {
if (dest.getIdx() != src1.getIdx()) {
e.movaps(dest, src1);
}
if (src2->type == FLOAT32_TYPE) {
@ -967,7 +976,7 @@ void XmmBinaryOpCV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
vv_fn(e, *i, dest, src2);
} else {
auto real_src2 = src2;
if (dest == src2) {
if (dest.getIdx() == src2.getIdx()) {
e.movaps(e.xmm0, src2);
real_src2 = e.xmm0;
}
@ -1010,9 +1019,9 @@ void XmmTernaryOpVVV(X64Emitter& e, Instr*& i, xmm_vvv_fn vvv_fn,
i->src1.value, src1, 0,
i->src2.value, src2, 0,
i->src3.value, src3, 0);
if (dest == src1) {
if (dest.getIdx() == src1.getIdx()) {
vvv_fn(e, *i, dest, src2, src3);
} else if (dest == src2) {
} else if (dest.getIdx() == src2.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
vvv_fn(e, *i, dest, src1, src3);
} else {
@ -1021,7 +1030,7 @@ void XmmTernaryOpVVV(X64Emitter& e, Instr*& i, xmm_vvv_fn vvv_fn,
vvv_fn(e, *i, e.xmm0, src2, src3);
e.movaps(dest, e.xmm0);
}
} else if (dest == src3) {
} else if (dest.getIdx() == src3.getIdx()) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
vvv_fn(e, *i, dest, src1, src2);
} else {

View File

@ -45,14 +45,14 @@ int X64Backend::Initialize() {
0,
"gpr",
MachineInfo::RegisterSet::INT_TYPES,
10,
X64Emitter::GPR_COUNT,
};
machine_info_.register_sets[1] = {
1,
"xmm",
MachineInfo::RegisterSet::FLOAT_TYPES |
MachineInfo::RegisterSet::VEC_TYPES,
10,
X64Emitter::XMM_COUNT,
};
code_cache_ = new X64CodeCache();

View File

@ -36,6 +36,16 @@ static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024;
} // namespace alloy
const uint32_t X64Emitter::gpr_reg_map_[X64Emitter::GPR_COUNT] = {
Operand::RBX,
Operand::R12, Operand::R13, Operand::R14, Operand::R15,
};
const uint32_t X64Emitter::xmm_reg_map_[X64Emitter::XMM_COUNT] = {
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
};
X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) :
runtime_(backend->runtime()),
backend_(backend),
@ -43,7 +53,6 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) :
allocator_(allocator),
current_instr_(0),
CodeGenerator(MAX_CODE_SIZE, AutoGrow, allocator) {
xe_zero_struct(&reg_state_, sizeof(reg_state_));
}
X64Emitter::~X64Emitter() {
@ -99,28 +108,6 @@ void* X64Emitter::Emplace(size_t stack_size) {
}
int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
// These are the registers we will not be using. All others are fare game.
const uint32_t reserved_regs =
GetRegBit(rax) | // scratch
GetRegBit(rcx) | // arg
GetRegBit(rdx) | // arg/clobbered
GetRegBit(rsp) |
GetRegBit(rbp) |
GetRegBit(rsi) |
GetRegBit(rdi) |
GetRegBit(r8) | // arg/clobbered
GetRegBit(xmm0) | // scratch
GetRegBit(xmm1) | // sometimes used for scratch, could be fixed
// TODO(benvanik): save so that we can use these.
GetRegBit(r9) |
GetRegBit(r10) |
GetRegBit(r11) |
GetRegBit(xmm2) |
GetRegBit(xmm3) |
GetRegBit(xmm4) |
GetRegBit(xmm5);
// Calculate stack size. We need to align things to their natural sizes.
// This could be much better (sort by type/etc).
auto locals = builder->locals();
@ -164,8 +151,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
auto lowering_table = backend_->lowering_table();
reg_state_.active_regs = reg_state_.live_regs = reserved_regs;
// Body.
auto block = builder->first_block();
while (block) {
@ -176,11 +161,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
label = label->next;
}
// Reset reg allocation state.
// If we start keeping regs across blocks this needs to change.
// We mark a few active so that the allocator doesn't use them.
ResetRegisters(reserved_regs);
// Add instructions.
// The table will process sequences of instructions to (try to)
// generate optimal code.
@ -211,201 +191,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
return 0;
}
void X64Emitter::ResetRegisters(uint32_t reserved_regs) {
// Just need to reset the register for each live value.
uint32_t live_regs = reg_state_.live_regs;
for (size_t n = 0; n < 32; n++, live_regs >>= 1) {
if (live_regs & 0x1) {
auto v = reg_state_.reg_values[n];
if (v) {
v->reg.index = -1;
}
}
reg_state_.reg_values[n] = 0;
}
reg_state_.active_regs = reg_state_.live_regs = reserved_regs;
}
void X64Emitter::EvictStaleRegisters() {
// NOTE: if we are getting called it's because we *need* a register.
// We must get rid of something.
uint32_t current_ordinal = current_instr_ ?
current_instr_->ordinal : 0xFFFFFFFF;
// Remove any register with no more uses.
uint32_t new_live_regs = 0;
for (size_t n = 0; n < 32; n++) {
uint32_t bit = 1 << n;
if (bit & reg_state_.active_regs) {
// Register is active and cannot be freed.
new_live_regs |= bit;
continue;
}
if (!(bit & reg_state_.live_regs)) {
// Register is not alive - nothing to do.
continue;
}
// Register is live, not active. Check and see if we get rid of it.
auto v = reg_state_.reg_values[n];
if (!v->last_use ||
v->last_use->ordinal < current_ordinal) {
reg_state_.reg_values[n] = NULL;
v->reg = -1;
continue;
}
// Register still in use.
new_live_regs |= bit;
}
// Hrm. We have spilled.
if (reg_state_.live_regs == new_live_regs) {
XEASSERTALWAYS();
}
reg_state_.live_regs = new_live_regs;
// Assert that live is a superset of active.
XEASSERTZERO((reg_state_.live_regs ^ reg_state_.active_regs) & reg_state_.active_regs);
}
void X64Emitter::FindFreeRegs(
Value* v0, uint32_t& v0_idx, uint32_t v0_flags) {
// If the value is already in a register, use it.
if (v0->reg != -1) {
// Already in a register. Mark active and return.
v0_idx = v0->reg;
reg_state_.active_regs |= 1 << v0_idx;
// Assert that live is a superset of active.
XEASSERTZERO((reg_state_.live_regs ^ reg_state_.active_regs) & reg_state_.active_regs);
return;
}
uint32_t avail_regs = 0;
if (IsIntType(v0->type)) {
if (v0_flags & REG_ABCD) {
avail_regs = B00001111;
} else {
avail_regs = 0xFFFF;
}
} else {
avail_regs = 0xFFFF0000;
}
uint32_t free_regs = avail_regs & ~reg_state_.live_regs;
if (!free_regs) {
// Need to evict something.
EvictStaleRegisters();
free_regs = avail_regs & ~reg_state_.live_regs;
XEASSERT(free_regs);
}
// Find the first available.
// We start from the MSB so that we get the non-rNx regs that are often
// in short supply.
_BitScanReverse((DWORD*)&v0_idx, free_regs);
reg_state_.active_regs |= 1 << v0_idx;
reg_state_.live_regs |= 1 << v0_idx;
v0->reg = v0_idx;
reg_state_.reg_values[v0_idx] = v0;
}
void X64Emitter::FindFreeRegs(
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
Value* v1, uint32_t& v1_idx, uint32_t v1_flags) {
// TODO(benvanik): support REG_DEST reuse/etc.
// Grab all already-present registers first.
// This way we won't spill them trying to get new registers.
bool need_v0 = v0->reg == -1;
bool need_v1 = v1->reg == -1;
if (!need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (!need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
// Grab any registers we still need. These calls may evict.
if (need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
}
void X64Emitter::FindFreeRegs(
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
Value* v2, uint32_t& v2_idx, uint32_t v2_flags) {
// TODO(benvanik): support REG_DEST reuse/etc.
// Grab all already-present registers first.
// This way we won't spill them trying to get new registers.
bool need_v0 = v0->reg == -1;
bool need_v1 = v1->reg == -1;
bool need_v2 = v2->reg == -1;
if (!need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (!need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
if (!need_v2) {
FindFreeRegs(v2, v2_idx, v2_flags);
}
// Grab any registers we still need. These calls may evict.
if (need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
if (need_v2) {
FindFreeRegs(v2, v2_idx, v2_flags);
}
}
void X64Emitter::FindFreeRegs(
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
Value* v2, uint32_t& v2_idx, uint32_t v2_flags,
Value* v3, uint32_t& v3_idx, uint32_t v3_flags) {
// TODO(benvanik): support REG_DEST reuse/etc.
// Grab all already-present registers first.
// This way we won't spill them trying to get new registers.
bool need_v0 = v0->reg == -1;
bool need_v1 = v1->reg == -1;
bool need_v2 = v2->reg == -1;
bool need_v3 = v3->reg == -1;
if (!need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (!need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
if (!need_v2) {
FindFreeRegs(v2, v2_idx, v2_flags);
}
if (!need_v3) {
FindFreeRegs(v3, v3_idx, v3_flags);
}
// Grab any registers we still need. These calls may evict.
if (need_v0) {
FindFreeRegs(v0, v0_idx, v0_flags);
}
if (need_v1) {
FindFreeRegs(v1, v1_idx, v1_flags);
}
if (need_v2) {
FindFreeRegs(v2, v2_idx, v2_flags);
}
if (need_v3) {
FindFreeRegs(v3, v3_idx, v3_flags);
}
}
Instr* X64Emitter::Advance(Instr* i) {
auto next = i->next;
current_instr_ = next;

View File

@ -56,90 +56,73 @@ public:
public:
template<typename V0>
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) {
uint32_t v0_idx;
FindFreeRegs(v0, v0_idx, r0_flags);
SetupReg(v0_idx, r0);
SetupReg(v0, r0);
}
template<typename V0, typename V1>
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
hir::Value* v1, V1& r1, uint32_t r1_flags) {
uint32_t v0_idx, v1_idx;
FindFreeRegs(v0, v0_idx, r0_flags,
v1, v1_idx, r1_flags);
SetupReg(v0_idx, r0);
SetupReg(v1_idx, r1);
SetupReg(v0, r0);
SetupReg(v1, r1);
}
template<typename V0, typename V1, typename V2>
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
hir::Value* v1, V1& r1, uint32_t r1_flags,
hir::Value* v2, V2& r2, uint32_t r2_flags) {
uint32_t v0_idx, v1_idx, v2_idx;
FindFreeRegs(v0, v0_idx, r0_flags,
v1, v1_idx, r1_flags,
v2, v2_idx, r2_flags);
SetupReg(v0_idx, r0);
SetupReg(v1_idx, r1);
SetupReg(v2_idx, r2);
SetupReg(v0, r0);
SetupReg(v1, r1);
SetupReg(v2, r2);
}
template<typename V0, typename V1, typename V2, typename V3>
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
hir::Value* v1, V1& r1, uint32_t r1_flags,
hir::Value* v2, V2& r2, uint32_t r2_flags,
hir::Value* v3, V3& r3, uint32_t r3_flags) {
uint32_t v0_idx, v1_idx, v2_idx, v3_idx;
FindFreeRegs(v0, v0_idx, r0_flags,
v1, v1_idx, r1_flags,
v2, v2_idx, r2_flags,
v3, v3_idx, r3_flags);
SetupReg(v0_idx, r0);
SetupReg(v1_idx, r1);
SetupReg(v2_idx, r2);
SetupReg(v3_idx, r3);
SetupReg(v0, r0);
SetupReg(v1, r1);
SetupReg(v2, r2);
SetupReg(v3, r3);
}
template<typename V0>
void EndOp(V0& r0) {
reg_state_.active_regs = reg_state_.active_regs ^ GetRegBit(r0);
}
template<typename V0, typename V1>
void EndOp(V0& r0, V1& r1) {
reg_state_.active_regs = reg_state_.active_regs ^ (
GetRegBit(r0) | GetRegBit(r1));
}
template<typename V0, typename V1, typename V2>
void EndOp(V0& r0, V1& r1, V2& r2) {
reg_state_.active_regs = reg_state_.active_regs ^ (
GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2));
}
template<typename V0, typename V1, typename V2, typename V3>
void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) {
reg_state_.active_regs = reg_state_.active_regs ^ (
GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2) | GetRegBit(r3));
}
void ResetRegisters(uint32_t reserved_regs);
void EvictStaleRegisters();
// Reserved: rsp
// Scratch: rax/rcx/rdx
// xmm0-1
// Available: rbx, r12-r15 (maybe r8-r11, rbp, rsi, rdi?)
// xmm2-xmm15
static const int GPR_COUNT = 5;
static const int XMM_COUNT = 14;
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags);
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags);
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags);
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags,
hir::Value* v3, uint32_t& v3_idx, uint32_t v3_flags);
static void SetupReg(uint32_t idx, Xbyak::Reg8& r) { r = Xbyak::Reg8(idx); }
static void SetupReg(uint32_t idx, Xbyak::Reg16& r) { r = Xbyak::Reg16(idx); }
static void SetupReg(uint32_t idx, Xbyak::Reg32& r) { r = Xbyak::Reg32(idx); }
static void SetupReg(uint32_t idx, Xbyak::Reg64& r) { r = Xbyak::Reg64(idx); }
static void SetupReg(uint32_t idx, Xbyak::Xmm& r) { r = Xbyak::Xmm(idx - 16); }
static uint32_t GetRegBit(const Xbyak::Reg8& r) { return 1 << r.getIdx(); }
static uint32_t GetRegBit(const Xbyak::Reg16& r) { return 1 << r.getIdx(); }
static uint32_t GetRegBit(const Xbyak::Reg32& r) { return 1 << r.getIdx(); }
static uint32_t GetRegBit(const Xbyak::Reg64& r) { return 1 << r.getIdx(); }
static uint32_t GetRegBit(const Xbyak::Xmm& r) { return 1 << (16 + r.getIdx()); }
static void SetupReg(hir::Value* v, Xbyak::Reg8& r) {
auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg8(idx);
}
static void SetupReg(hir::Value* v, Xbyak::Reg16& r) {
auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg16(idx);
}
static void SetupReg(hir::Value* v, Xbyak::Reg32& r) {
auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg32(idx);
}
static void SetupReg(hir::Value* v, Xbyak::Reg64& r) {
auto idx = gpr_reg_map_[v->reg.index];
r = Xbyak::Reg64(idx);
}
static void SetupReg(hir::Value* v, Xbyak::Xmm& r) {
auto idx = xmm_reg_map_[v->reg.index];
r = Xbyak::Xmm(idx);
}
hir::Instr* Advance(hir::Instr* i);
@ -157,21 +140,15 @@ protected:
X64CodeCache* code_cache_;
XbyakAllocator* allocator_;
struct {
// Registers currently active within a begin/end op block. These
// cannot be reused.
uint32_t active_regs;
// Registers with values in them.
uint32_t live_regs;
// Current register values.
hir::Value* reg_values[32];
} reg_state_;
hir::Instr* current_instr_;
size_t source_map_count_;
Arena source_map_arena_;
size_t stack_size_;
static const uint32_t gpr_reg_map_[GPR_COUNT];
static const uint32_t xmm_reg_map_[XMM_COUNT];
};

View File

@ -179,6 +179,76 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
break;
// TODO(benvanik): compares
case OPCODE_COMPARE_EQ:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantEQ(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_NE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantNE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_SLT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantSLT(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_SLE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantSLE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_SGT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantSGT(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_SGE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantSGE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_ULT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantULT(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_ULE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantULE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_UGT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantUGT(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_COMPARE_UGE:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
bool value = i->src1.value->IsConstantUGE(i->src2.value);
i->dest->set_constant(value);
i->Remove();
}
break;
case OPCODE_ADD:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {

View File

@ -280,6 +280,7 @@ Value* PPCHIRBuilder::LoadCA() {
}
void PPCHIRBuilder::StoreCA(Value* value) {
value = Truncate(value, INT8_TYPE);
StoreContext(offsetof(PPCContext, xer_ca), value);
}
@ -288,6 +289,7 @@ Value* PPCHIRBuilder::LoadSAT() {
}
void PPCHIRBuilder::StoreSAT(Value* value) {
value = Truncate(value, INT8_TYPE);
StoreContext(offsetof(PPCContext, vscr_sat), value);
}

View File

@ -363,7 +363,7 @@ DEFINE_OPCODE(
OPCODE_ADD_CARRY,
"add_carry",
OPCODE_SIG_V_V_V_V,
OPCODE_FLAG_COMMUNATIVE);
0);
DEFINE_OPCODE(
OPCODE_VECTOR_ADD,

View File

@ -221,6 +221,150 @@ public:
(other->flags & VALUE_IS_CONSTANT) &&
constant.i64 != other->constant.i64;
}
bool IsConstantSLT(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return constant.i8 < other->constant.i8;
case INT16_TYPE:
return constant.i16 < other->constant.i16;
case INT32_TYPE:
return constant.i32 < other->constant.i32;
case INT64_TYPE:
return constant.i64 < other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 < other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 < other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantSLE(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return constant.i8 <= other->constant.i8;
case INT16_TYPE:
return constant.i16 <= other->constant.i16;
case INT32_TYPE:
return constant.i32 <= other->constant.i32;
case INT64_TYPE:
return constant.i64 <= other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 <= other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 <= other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantSGT(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return constant.i8 > other->constant.i8;
case INT16_TYPE:
return constant.i16 > other->constant.i16;
case INT32_TYPE:
return constant.i32 > other->constant.i32;
case INT64_TYPE:
return constant.i64 > other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 > other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 > other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantSGE(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return constant.i8 >= other->constant.i8;
case INT16_TYPE:
return constant.i16 >= other->constant.i16;
case INT32_TYPE:
return constant.i32 >= other->constant.i32;
case INT64_TYPE:
return constant.i64 >= other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 >= other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 >= other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantULT(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return (uint8_t)constant.i8 < (uint8_t)other->constant.i8;
case INT16_TYPE:
return (uint16_t)constant.i16 < (uint16_t)other->constant.i16;
case INT32_TYPE:
return (uint32_t)constant.i32 < (uint32_t)other->constant.i32;
case INT64_TYPE:
return (uint64_t)constant.i64 < (uint64_t)other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 < other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 < other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantULE(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return (uint8_t)constant.i8 <= (uint8_t)other->constant.i8;
case INT16_TYPE:
return (uint16_t)constant.i16 <= (uint16_t)other->constant.i16;
case INT32_TYPE:
return (uint32_t)constant.i32 <= (uint32_t)other->constant.i32;
case INT64_TYPE:
return (uint64_t)constant.i64 <= (uint64_t)other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 <= other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 <= other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantUGT(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return (uint8_t)constant.i8 > (uint8_t)other->constant.i8;
case INT16_TYPE:
return (uint16_t)constant.i16 > (uint16_t)other->constant.i16;
case INT32_TYPE:
return (uint32_t)constant.i32 > (uint32_t)other->constant.i32;
case INT64_TYPE:
return (uint64_t)constant.i64 > (uint64_t)other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 > other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 > other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
bool IsConstantUGE(Value* other) const {
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
switch (type) {
case INT8_TYPE:
return (uint8_t)constant.i8 >= (uint8_t)other->constant.i8;
case INT16_TYPE:
return (uint16_t)constant.i16 >= (uint16_t)other->constant.i16;
case INT32_TYPE:
return (uint32_t)constant.i32 >= (uint32_t)other->constant.i32;
case INT64_TYPE:
return (uint64_t)constant.i64 >= (uint64_t)other->constant.i64;
case FLOAT32_TYPE:
return constant.f32 >= other->constant.f32;
case FLOAT64_TYPE:
return constant.f64 >= other->constant.f64;
default: XEASSERTALWAYS(); return false;
}
}
uint32_t AsUint32();
uint64_t AsUint64();

View File

@ -58,7 +58,7 @@ Runtime::~Runtime() {
// TODO(benvanik): based on compiler support
#include <alloy/backend/ivm/ivm_backend.h>
//#include <alloy/backend/x64/x64_backend.h>
#include <alloy/backend/x64/x64_backend.h>
int Runtime::Initialize(Frontend* frontend, Backend* backend) {
// Must be initialized by subclass before calling into this.