diff --git a/src/alloy/backend/sources.gypi b/src/alloy/backend/sources.gypi index a7e2c0928..41419ac7a 100644 --- a/src/alloy/backend/sources.gypi +++ b/src/alloy/backend/sources.gypi @@ -11,6 +11,6 @@ 'includes': [ 'ivm/sources.gypi', - #'x64/sources.gypi', + 'x64/sources.gypi', ], } diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index 754fd0d07..0573c928f 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -2500,7 +2500,7 @@ table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) { } else if (IsVecType(i->dest->type)) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { // dest_src ^= 0xFFFF... - if (dest != src) { + if (dest.getIdx() != src.getIdx()) { e.movaps(dest, src); } e.mov(e.rax, XMMCONSTBASE); @@ -2697,7 +2697,7 @@ table->AddSequence(OPCODE_BYTE_SWAP, [](X64Emitter& e, Instr*& i) { Reg32 dest, src1; e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0); - if (dest != src1) { + if (dest.getIdx() != src1.getIdx()) { e.mov(dest, src1); e.bswap(dest); } else { @@ -2708,7 +2708,7 @@ table->AddSequence(OPCODE_BYTE_SWAP, [](X64Emitter& e, Instr*& i) { Reg64 dest, src1; e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0); - if (dest != src1) { + if (dest.getIdx() != src1.getIdx()) { e.mov(dest, src1); e.bswap(dest); } else { @@ -2972,7 +2972,7 @@ table->AddSequence(OPCODE_PERMUTE, [](X64Emitter& e, Instr*& i) { (((control >> 18) & 0x1) << 1) | (((control >> 10) & 0x1) << 2) | (((control >> 2) & 0x1) << 3); - if (dest != src3) { + if (dest.getIdx() != src3.getIdx()) { e.pshufd(dest, src2, src_control); e.pshufd(e.xmm0, src3, src_control); e.blendps(dest, e.xmm0, blend_control); diff --git a/src/alloy/backend/x64/lowering/op_utils.inl b/src/alloy/backend/x64/lowering/op_utils.inl index ce8f019db..0daac5d64 100644 --- a/src/alloy/backend/x64/lowering/op_utils.inl +++ b/src/alloy/backend/x64/lowering/op_utils.inl @@ -311,9 +311,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign if (op == VECTOR_CMP_EQ) { // Commutative, so simple. Xmm real_src; - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { real_src = src2; - } else if (dest == src2) { + } else if (dest.getIdx() == src2.getIdx()) { real_src = src1; } else { e.movaps(dest, src1); @@ -334,9 +334,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign // Float GT/GE must be emulated. if (op == VECTOR_CMP_GT) { // Have to swap: src2 < src1. - if (dest == src2) { + if (dest.getIdx() == src2.getIdx()) { e.cmpltps(dest, src1); - } else if (dest == src1) { + } else if (dest.getIdx() == src1.getIdx()) { e.movaps(e.xmm0, src1); e.movaps(dest, src2); e.cmpltps(dest, e.xmm0); @@ -346,9 +346,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign } } else if (op == VECTOR_CMP_GE) { // Have to swap: src2 <= src1. - if (dest == src2) { + if (dest.getIdx() == src2.getIdx()) { e.cmpleps(dest, src1); - } else if (dest == src1) { + } else if (dest.getIdx() == src1.getIdx()) { e.movaps(e.xmm0, src1); e.movaps(dest, src2); e.cmpleps(dest, e.xmm0); @@ -362,9 +362,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign } else { // Integer types are easier. Xmm real_src; - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { real_src = src2; - } else if (dest == src2) { + } else if (dest.getIdx() == src2.getIdx()) { e.movaps(e.xmm0, src2); e.movaps(dest, src1); real_src = e.xmm0; @@ -429,7 +429,7 @@ void IntUnaryOpV(X64Emitter& e, Instr*& i, v_fn v_fn, T& dest, T& src1) { e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0); - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { v_fn(e, *i, dest); } else { e.mov(dest, src1); @@ -486,9 +486,9 @@ void IntBinaryOpVV(X64Emitter& e, Instr*& i, vv_fn vv_fn, e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0, i->src2.value, src2, 0); - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { vv_fn(e, *i, dest, src2); - } else if (dest == src2) { + } else if (dest.getIdx() == src2.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { vv_fn(e, *i, dest, src1); } else { @@ -511,7 +511,7 @@ void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, i->src1.value, src1, 0); if (dest.getBit() <= 32) { // 32-bit. - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { vc_fn(e, *i, dest, (uint32_t)src2->get_constant(CT())); } else { e.mov(dest, src1); @@ -519,7 +519,7 @@ void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, } } else { // 64-bit. - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { e.mov(TEMP_REG, src2->constant.i64); vv_fn(e, *i, dest, TEMP_REG); } else { @@ -537,7 +537,7 @@ void IntBinaryOpCV(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, i->src2.value, src2, 0); if (dest.getBit() <= 32) { // 32-bit. - if (dest == src2) { + if (dest.getIdx() == src2.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { vc_fn(e, *i, dest, (uint32_t)src1->get_constant(CT())); } else { @@ -559,7 +559,7 @@ void IntBinaryOpCV(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, } } else { // 64-bit. - if (dest == src2) { + if (dest.getIdx() == src2.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { e.mov(TEMP_REG, src1->constant.i64); vv_fn(e, *i, dest, TEMP_REG); @@ -669,14 +669,19 @@ void IntTernaryOpVVV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, i->src1.value, src1, 0, i->src2.value, src2, 0, i->src3.value, src3, 0); - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { vvv_fn(e, *i, dest, src2, src3); - } else if (dest == src2) { + } else if (dest.getIdx() == src2.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { vvv_fn(e, *i, dest, src1, src3); } else { UNIMPLEMENTED_SEQ(); } + } else if (dest.getIdx() == src3.getIdx()) { + auto Ntx = TEMP_LIKE(src3); + e.mov(Ntx, src3); + e.mov(dest, src1); + vvv_fn(e, *i, dest, src2, Ntx); } else { e.mov(dest, src1); vvv_fn(e, *i, dest, src2, src3); @@ -691,7 +696,7 @@ void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, i->src2.value, src2, 0); if (dest.getBit() <= 32) { // 32-bit. - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { vvc_fn(e, *i, dest, src2, (uint32_t)src3->get_constant(CT())); } else if (dest == src2) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { @@ -709,10 +714,10 @@ void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, } } else { // 64-bit. - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { e.mov(TEMP_REG, src3->constant.i64); vvv_fn(e, *i, dest, src2, TEMP_REG); - } else if (dest == src2) { + } else if (dest.getIdx() == src2.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { e.mov(TEMP_REG, src3->constant.i64); vvv_fn(e, *i, dest, src1, TEMP_REG); @@ -740,9 +745,9 @@ void IntTernaryOpVCV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vcv_fn vcv_fn, i->src3.value, src3, 0); if (dest.getBit() <= 32) { // 32-bit. - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src3); - } else if (dest == src3) { + } else if (dest.getIdx() == src3.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src1); } else { @@ -758,10 +763,10 @@ void IntTernaryOpVCV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vcv_fn vcv_fn, } } else { // 64-bit. - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { e.mov(TEMP_REG, src2->constant.i64); vvv_fn(e, *i, dest, TEMP_REG, src3); - } else if (dest == src3) { + } else if (dest.getIdx() == src3.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { e.mov(TEMP_REG, src2->constant.i64); vvv_fn(e, *i, dest, src1, TEMP_REG); @@ -817,16 +822,20 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn IntTernaryOpVVC(e, i, vvv_fn, vvc_fn, dest, src1, src2, i->src3.value); // } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8C, SIG_TYPE_I8)) { - Reg8 dest, src1, src3; + Reg8 dest, src1; + Reg8 src3; IntTernaryOpVCV(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3); } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16C, SIG_TYPE_I8)) { - Reg16 dest, src1, src3; + Reg16 dest, src1; + Reg8 src3; IntTernaryOpVCV(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3); } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32C, SIG_TYPE_I8)) { - Reg32 dest, src1, src3; + Reg32 dest, src1; + Reg8 src3; IntTernaryOpVCV(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3); } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64C, SIG_TYPE_I8)) { - Reg64 dest, src1, src3; + Reg64 dest, src1; + Reg8 src3; IntTernaryOpVCV(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3); } else { ASSERT_INVALID_TYPE(); @@ -856,7 +865,7 @@ void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, e.mov(e.rax, (uint64_t)src1->constant.i64); e.movq(dest, e.rax); } else { - UNIMPLEMENTED_SEQ(); + LoadXmmConstant(e, dest, src1->constant.v128); } v_fn(e, *i, dest, dest); e.EndOp(dest); @@ -901,9 +910,9 @@ void XmmBinaryOpVV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn, e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0, i->src2.value, src2, 0); - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { vv_fn(e, *i, dest, src2); - } else if (dest == src2) { + } else if (dest.getIdx() == src2.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { vv_fn(e, *i, dest, src1); } else { @@ -934,7 +943,7 @@ void XmmBinaryOpVC(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn, } vv_fn(e, *i, dest, src1); } else { - if (dest != src1) { + if (dest.getIdx() != src1.getIdx()) { e.movaps(dest, src1); } if (src2->type == FLOAT32_TYPE) { @@ -967,7 +976,7 @@ void XmmBinaryOpCV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn, vv_fn(e, *i, dest, src2); } else { auto real_src2 = src2; - if (dest == src2) { + if (dest.getIdx() == src2.getIdx()) { e.movaps(e.xmm0, src2); real_src2 = e.xmm0; } @@ -1010,9 +1019,9 @@ void XmmTernaryOpVVV(X64Emitter& e, Instr*& i, xmm_vvv_fn vvv_fn, i->src1.value, src1, 0, i->src2.value, src2, 0, i->src3.value, src3, 0); - if (dest == src1) { + if (dest.getIdx() == src1.getIdx()) { vvv_fn(e, *i, dest, src2, src3); - } else if (dest == src2) { + } else if (dest.getIdx() == src2.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { vvv_fn(e, *i, dest, src1, src3); } else { @@ -1021,7 +1030,7 @@ void XmmTernaryOpVVV(X64Emitter& e, Instr*& i, xmm_vvv_fn vvv_fn, vvv_fn(e, *i, e.xmm0, src2, src3); e.movaps(dest, e.xmm0); } - } else if (dest == src3) { + } else if (dest.getIdx() == src3.getIdx()) { if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { vvv_fn(e, *i, dest, src1, src2); } else { diff --git a/src/alloy/backend/x64/x64_backend.cc b/src/alloy/backend/x64/x64_backend.cc index 8c1968571..076ab1cbb 100644 --- a/src/alloy/backend/x64/x64_backend.cc +++ b/src/alloy/backend/x64/x64_backend.cc @@ -45,14 +45,14 @@ int X64Backend::Initialize() { 0, "gpr", MachineInfo::RegisterSet::INT_TYPES, - 10, + X64Emitter::GPR_COUNT, }; machine_info_.register_sets[1] = { 1, "xmm", MachineInfo::RegisterSet::FLOAT_TYPES | MachineInfo::RegisterSet::VEC_TYPES, - 10, + X64Emitter::XMM_COUNT, }; code_cache_ = new X64CodeCache(); diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 4a1442ca5..8ae38d80d 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -36,6 +36,16 @@ static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024; } // namespace alloy +const uint32_t X64Emitter::gpr_reg_map_[X64Emitter::GPR_COUNT] = { + Operand::RBX, + Operand::R12, Operand::R13, Operand::R14, Operand::R15, +}; + +const uint32_t X64Emitter::xmm_reg_map_[X64Emitter::XMM_COUNT] = { + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +}; + + X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) : runtime_(backend->runtime()), backend_(backend), @@ -43,7 +53,6 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) : allocator_(allocator), current_instr_(0), CodeGenerator(MAX_CODE_SIZE, AutoGrow, allocator) { - xe_zero_struct(®_state_, sizeof(reg_state_)); } X64Emitter::~X64Emitter() { @@ -99,28 +108,6 @@ void* X64Emitter::Emplace(size_t stack_size) { } int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { - // These are the registers we will not be using. All others are fare game. - const uint32_t reserved_regs = - GetRegBit(rax) | // scratch - GetRegBit(rcx) | // arg - GetRegBit(rdx) | // arg/clobbered - GetRegBit(rsp) | - GetRegBit(rbp) | - GetRegBit(rsi) | - GetRegBit(rdi) | - GetRegBit(r8) | // arg/clobbered - GetRegBit(xmm0) | // scratch - GetRegBit(xmm1) | // sometimes used for scratch, could be fixed - - // TODO(benvanik): save so that we can use these. - GetRegBit(r9) | - GetRegBit(r10) | - GetRegBit(r11) | - GetRegBit(xmm2) | - GetRegBit(xmm3) | - GetRegBit(xmm4) | - GetRegBit(xmm5); - // Calculate stack size. We need to align things to their natural sizes. // This could be much better (sort by type/etc). auto locals = builder->locals(); @@ -164,8 +151,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { auto lowering_table = backend_->lowering_table(); - reg_state_.active_regs = reg_state_.live_regs = reserved_regs; - // Body. auto block = builder->first_block(); while (block) { @@ -176,11 +161,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { label = label->next; } - // Reset reg allocation state. - // If we start keeping regs across blocks this needs to change. - // We mark a few active so that the allocator doesn't use them. - ResetRegisters(reserved_regs); - // Add instructions. // The table will process sequences of instructions to (try to) // generate optimal code. @@ -211,201 +191,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { return 0; } -void X64Emitter::ResetRegisters(uint32_t reserved_regs) { - // Just need to reset the register for each live value. - uint32_t live_regs = reg_state_.live_regs; - for (size_t n = 0; n < 32; n++, live_regs >>= 1) { - if (live_regs & 0x1) { - auto v = reg_state_.reg_values[n]; - if (v) { - v->reg.index = -1; - } - } - reg_state_.reg_values[n] = 0; - } - reg_state_.active_regs = reg_state_.live_regs = reserved_regs; -} - -void X64Emitter::EvictStaleRegisters() { - // NOTE: if we are getting called it's because we *need* a register. - // We must get rid of something. - - uint32_t current_ordinal = current_instr_ ? - current_instr_->ordinal : 0xFFFFFFFF; - - // Remove any register with no more uses. - uint32_t new_live_regs = 0; - for (size_t n = 0; n < 32; n++) { - uint32_t bit = 1 << n; - if (bit & reg_state_.active_regs) { - // Register is active and cannot be freed. - new_live_regs |= bit; - continue; - } - if (!(bit & reg_state_.live_regs)) { - // Register is not alive - nothing to do. - continue; - } - - // Register is live, not active. Check and see if we get rid of it. - auto v = reg_state_.reg_values[n]; - if (!v->last_use || - v->last_use->ordinal < current_ordinal) { - reg_state_.reg_values[n] = NULL; - v->reg = -1; - continue; - } - - // Register still in use. - new_live_regs |= bit; - } - - // Hrm. We have spilled. - if (reg_state_.live_regs == new_live_regs) { - XEASSERTALWAYS(); - } - - reg_state_.live_regs = new_live_regs; - - // Assert that live is a superset of active. - XEASSERTZERO((reg_state_.live_regs ^ reg_state_.active_regs) & reg_state_.active_regs); -} - -void X64Emitter::FindFreeRegs( - Value* v0, uint32_t& v0_idx, uint32_t v0_flags) { - // If the value is already in a register, use it. - if (v0->reg != -1) { - // Already in a register. Mark active and return. - v0_idx = v0->reg; - reg_state_.active_regs |= 1 << v0_idx; - - // Assert that live is a superset of active. - XEASSERTZERO((reg_state_.live_regs ^ reg_state_.active_regs) & reg_state_.active_regs); - return; - } - - uint32_t avail_regs = 0; - if (IsIntType(v0->type)) { - if (v0_flags & REG_ABCD) { - avail_regs = B00001111; - } else { - avail_regs = 0xFFFF; - } - } else { - avail_regs = 0xFFFF0000; - } - uint32_t free_regs = avail_regs & ~reg_state_.live_regs; - if (!free_regs) { - // Need to evict something. - EvictStaleRegisters(); - free_regs = avail_regs & ~reg_state_.live_regs; - XEASSERT(free_regs); - } - - // Find the first available. - // We start from the MSB so that we get the non-rNx regs that are often - // in short supply. - _BitScanReverse((DWORD*)&v0_idx, free_regs); - - reg_state_.active_regs |= 1 << v0_idx; - reg_state_.live_regs |= 1 << v0_idx; - v0->reg = v0_idx; - reg_state_.reg_values[v0_idx] = v0; -} - -void X64Emitter::FindFreeRegs( - Value* v0, uint32_t& v0_idx, uint32_t v0_flags, - Value* v1, uint32_t& v1_idx, uint32_t v1_flags) { - // TODO(benvanik): support REG_DEST reuse/etc. - // Grab all already-present registers first. - // This way we won't spill them trying to get new registers. - bool need_v0 = v0->reg == -1; - bool need_v1 = v1->reg == -1; - if (!need_v0) { - FindFreeRegs(v0, v0_idx, v0_flags); - } - if (!need_v1) { - FindFreeRegs(v1, v1_idx, v1_flags); - } - // Grab any registers we still need. These calls may evict. - if (need_v0) { - FindFreeRegs(v0, v0_idx, v0_flags); - } - if (need_v1) { - FindFreeRegs(v1, v1_idx, v1_flags); - } -} - -void X64Emitter::FindFreeRegs( - Value* v0, uint32_t& v0_idx, uint32_t v0_flags, - Value* v1, uint32_t& v1_idx, uint32_t v1_flags, - Value* v2, uint32_t& v2_idx, uint32_t v2_flags) { - // TODO(benvanik): support REG_DEST reuse/etc. - // Grab all already-present registers first. - // This way we won't spill them trying to get new registers. - bool need_v0 = v0->reg == -1; - bool need_v1 = v1->reg == -1; - bool need_v2 = v2->reg == -1; - if (!need_v0) { - FindFreeRegs(v0, v0_idx, v0_flags); - } - if (!need_v1) { - FindFreeRegs(v1, v1_idx, v1_flags); - } - if (!need_v2) { - FindFreeRegs(v2, v2_idx, v2_flags); - } - // Grab any registers we still need. These calls may evict. - if (need_v0) { - FindFreeRegs(v0, v0_idx, v0_flags); - } - if (need_v1) { - FindFreeRegs(v1, v1_idx, v1_flags); - } - if (need_v2) { - FindFreeRegs(v2, v2_idx, v2_flags); - } -} - -void X64Emitter::FindFreeRegs( - Value* v0, uint32_t& v0_idx, uint32_t v0_flags, - Value* v1, uint32_t& v1_idx, uint32_t v1_flags, - Value* v2, uint32_t& v2_idx, uint32_t v2_flags, - Value* v3, uint32_t& v3_idx, uint32_t v3_flags) { - // TODO(benvanik): support REG_DEST reuse/etc. - // Grab all already-present registers first. - // This way we won't spill them trying to get new registers. - bool need_v0 = v0->reg == -1; - bool need_v1 = v1->reg == -1; - bool need_v2 = v2->reg == -1; - bool need_v3 = v3->reg == -1; - if (!need_v0) { - FindFreeRegs(v0, v0_idx, v0_flags); - } - if (!need_v1) { - FindFreeRegs(v1, v1_idx, v1_flags); - } - if (!need_v2) { - FindFreeRegs(v2, v2_idx, v2_flags); - } - if (!need_v3) { - FindFreeRegs(v3, v3_idx, v3_flags); - } - // Grab any registers we still need. These calls may evict. - if (need_v0) { - FindFreeRegs(v0, v0_idx, v0_flags); - } - if (need_v1) { - FindFreeRegs(v1, v1_idx, v1_flags); - } - if (need_v2) { - FindFreeRegs(v2, v2_idx, v2_flags); - } - if (need_v3) { - FindFreeRegs(v3, v3_idx, v3_flags); - } -} - Instr* X64Emitter::Advance(Instr* i) { auto next = i->next; current_instr_ = next; diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index 4962dab14..ca13354a6 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -56,90 +56,73 @@ public: public: template void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) { - uint32_t v0_idx; - FindFreeRegs(v0, v0_idx, r0_flags); - SetupReg(v0_idx, r0); + SetupReg(v0, r0); } template void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, hir::Value* v1, V1& r1, uint32_t r1_flags) { - uint32_t v0_idx, v1_idx; - FindFreeRegs(v0, v0_idx, r0_flags, - v1, v1_idx, r1_flags); - SetupReg(v0_idx, r0); - SetupReg(v1_idx, r1); + SetupReg(v0, r0); + SetupReg(v1, r1); } template void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, hir::Value* v1, V1& r1, uint32_t r1_flags, hir::Value* v2, V2& r2, uint32_t r2_flags) { - uint32_t v0_idx, v1_idx, v2_idx; - FindFreeRegs(v0, v0_idx, r0_flags, - v1, v1_idx, r1_flags, - v2, v2_idx, r2_flags); - SetupReg(v0_idx, r0); - SetupReg(v1_idx, r1); - SetupReg(v2_idx, r2); + SetupReg(v0, r0); + SetupReg(v1, r1); + SetupReg(v2, r2); } template void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, hir::Value* v1, V1& r1, uint32_t r1_flags, hir::Value* v2, V2& r2, uint32_t r2_flags, hir::Value* v3, V3& r3, uint32_t r3_flags) { - uint32_t v0_idx, v1_idx, v2_idx, v3_idx; - FindFreeRegs(v0, v0_idx, r0_flags, - v1, v1_idx, r1_flags, - v2, v2_idx, r2_flags, - v3, v3_idx, r3_flags); - SetupReg(v0_idx, r0); - SetupReg(v1_idx, r1); - SetupReg(v2_idx, r2); - SetupReg(v3_idx, r3); + SetupReg(v0, r0); + SetupReg(v1, r1); + SetupReg(v2, r2); + SetupReg(v3, r3); } template void EndOp(V0& r0) { - reg_state_.active_regs = reg_state_.active_regs ^ GetRegBit(r0); } template void EndOp(V0& r0, V1& r1) { - reg_state_.active_regs = reg_state_.active_regs ^ ( - GetRegBit(r0) | GetRegBit(r1)); } template void EndOp(V0& r0, V1& r1, V2& r2) { - reg_state_.active_regs = reg_state_.active_regs ^ ( - GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2)); } template void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) { - reg_state_.active_regs = reg_state_.active_regs ^ ( - GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2) | GetRegBit(r3)); } - void ResetRegisters(uint32_t reserved_regs); - void EvictStaleRegisters(); + // Reserved: rsp + // Scratch: rax/rcx/rdx + // xmm0-1 + // Available: rbx, r12-r15 (maybe r8-r11, rbp, rsi, rdi?) + // xmm2-xmm15 + static const int GPR_COUNT = 5; + static const int XMM_COUNT = 14; - void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags); - void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags, - hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags); - void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags, - hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags, - hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags); - void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags, - hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags, - hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags, - hir::Value* v3, uint32_t& v3_idx, uint32_t v3_flags); - - static void SetupReg(uint32_t idx, Xbyak::Reg8& r) { r = Xbyak::Reg8(idx); } - static void SetupReg(uint32_t idx, Xbyak::Reg16& r) { r = Xbyak::Reg16(idx); } - static void SetupReg(uint32_t idx, Xbyak::Reg32& r) { r = Xbyak::Reg32(idx); } - static void SetupReg(uint32_t idx, Xbyak::Reg64& r) { r = Xbyak::Reg64(idx); } - static void SetupReg(uint32_t idx, Xbyak::Xmm& r) { r = Xbyak::Xmm(idx - 16); } - static uint32_t GetRegBit(const Xbyak::Reg8& r) { return 1 << r.getIdx(); } - static uint32_t GetRegBit(const Xbyak::Reg16& r) { return 1 << r.getIdx(); } - static uint32_t GetRegBit(const Xbyak::Reg32& r) { return 1 << r.getIdx(); } - static uint32_t GetRegBit(const Xbyak::Reg64& r) { return 1 << r.getIdx(); } - static uint32_t GetRegBit(const Xbyak::Xmm& r) { return 1 << (16 + r.getIdx()); } + static void SetupReg(hir::Value* v, Xbyak::Reg8& r) { + auto idx = gpr_reg_map_[v->reg.index]; + r = Xbyak::Reg8(idx); + } + static void SetupReg(hir::Value* v, Xbyak::Reg16& r) { + auto idx = gpr_reg_map_[v->reg.index]; + r = Xbyak::Reg16(idx); + } + static void SetupReg(hir::Value* v, Xbyak::Reg32& r) { + auto idx = gpr_reg_map_[v->reg.index]; + r = Xbyak::Reg32(idx); + } + static void SetupReg(hir::Value* v, Xbyak::Reg64& r) { + auto idx = gpr_reg_map_[v->reg.index]; + r = Xbyak::Reg64(idx); + } + static void SetupReg(hir::Value* v, Xbyak::Xmm& r) { + auto idx = xmm_reg_map_[v->reg.index]; + r = Xbyak::Xmm(idx); + } hir::Instr* Advance(hir::Instr* i); @@ -157,21 +140,15 @@ protected: X64CodeCache* code_cache_; XbyakAllocator* allocator_; - struct { - // Registers currently active within a begin/end op block. These - // cannot be reused. - uint32_t active_regs; - // Registers with values in them. - uint32_t live_regs; - // Current register values. - hir::Value* reg_values[32]; - } reg_state_; hir::Instr* current_instr_; size_t source_map_count_; Arena source_map_arena_; size_t stack_size_; + + static const uint32_t gpr_reg_map_[GPR_COUNT]; + static const uint32_t xmm_reg_map_[XMM_COUNT]; }; diff --git a/src/alloy/compiler/passes/constant_propagation_pass.cc b/src/alloy/compiler/passes/constant_propagation_pass.cc index 0bf269334..03a514a94 100644 --- a/src/alloy/compiler/passes/constant_propagation_pass.cc +++ b/src/alloy/compiler/passes/constant_propagation_pass.cc @@ -179,6 +179,76 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) { break; // TODO(benvanik): compares + case OPCODE_COMPARE_EQ: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantEQ(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_NE: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantNE(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_SLT: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantSLT(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_SLE: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantSLE(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_SGT: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantSGT(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_SGE: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantSGE(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_ULT: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantULT(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_ULE: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantULE(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_UGT: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantUGT(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; + case OPCODE_COMPARE_UGE: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + bool value = i->src1.value->IsConstantUGE(i->src2.value); + i->dest->set_constant(value); + i->Remove(); + } + break; case OPCODE_ADD: if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { diff --git a/src/alloy/frontend/ppc/ppc_hir_builder.cc b/src/alloy/frontend/ppc/ppc_hir_builder.cc index 2fc49396a..dd25c4f8a 100644 --- a/src/alloy/frontend/ppc/ppc_hir_builder.cc +++ b/src/alloy/frontend/ppc/ppc_hir_builder.cc @@ -280,6 +280,7 @@ Value* PPCHIRBuilder::LoadCA() { } void PPCHIRBuilder::StoreCA(Value* value) { + value = Truncate(value, INT8_TYPE); StoreContext(offsetof(PPCContext, xer_ca), value); } @@ -288,6 +289,7 @@ Value* PPCHIRBuilder::LoadSAT() { } void PPCHIRBuilder::StoreSAT(Value* value) { + value = Truncate(value, INT8_TYPE); StoreContext(offsetof(PPCContext, vscr_sat), value); } diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index 4fc7bd9dd..baf214f25 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -363,7 +363,7 @@ DEFINE_OPCODE( OPCODE_ADD_CARRY, "add_carry", OPCODE_SIG_V_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + 0); DEFINE_OPCODE( OPCODE_VECTOR_ADD, diff --git a/src/alloy/hir/value.h b/src/alloy/hir/value.h index c2c8ed7ae..4587efb19 100644 --- a/src/alloy/hir/value.h +++ b/src/alloy/hir/value.h @@ -221,6 +221,150 @@ public: (other->flags & VALUE_IS_CONSTANT) && constant.i64 != other->constant.i64; } + bool IsConstantSLT(Value* other) const { + XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT); + switch (type) { + case INT8_TYPE: + return constant.i8 < other->constant.i8; + case INT16_TYPE: + return constant.i16 < other->constant.i16; + case INT32_TYPE: + return constant.i32 < other->constant.i32; + case INT64_TYPE: + return constant.i64 < other->constant.i64; + case FLOAT32_TYPE: + return constant.f32 < other->constant.f32; + case FLOAT64_TYPE: + return constant.f64 < other->constant.f64; + default: XEASSERTALWAYS(); return false; + } + } + bool IsConstantSLE(Value* other) const { + XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT); + switch (type) { + case INT8_TYPE: + return constant.i8 <= other->constant.i8; + case INT16_TYPE: + return constant.i16 <= other->constant.i16; + case INT32_TYPE: + return constant.i32 <= other->constant.i32; + case INT64_TYPE: + return constant.i64 <= other->constant.i64; + case FLOAT32_TYPE: + return constant.f32 <= other->constant.f32; + case FLOAT64_TYPE: + return constant.f64 <= other->constant.f64; + default: XEASSERTALWAYS(); return false; + } + } + bool IsConstantSGT(Value* other) const { + XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT); + switch (type) { + case INT8_TYPE: + return constant.i8 > other->constant.i8; + case INT16_TYPE: + return constant.i16 > other->constant.i16; + case INT32_TYPE: + return constant.i32 > other->constant.i32; + case INT64_TYPE: + return constant.i64 > other->constant.i64; + case FLOAT32_TYPE: + return constant.f32 > other->constant.f32; + case FLOAT64_TYPE: + return constant.f64 > other->constant.f64; + default: XEASSERTALWAYS(); return false; + } + } + bool IsConstantSGE(Value* other) const { + XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT); + switch (type) { + case INT8_TYPE: + return constant.i8 >= other->constant.i8; + case INT16_TYPE: + return constant.i16 >= other->constant.i16; + case INT32_TYPE: + return constant.i32 >= other->constant.i32; + case INT64_TYPE: + return constant.i64 >= other->constant.i64; + case FLOAT32_TYPE: + return constant.f32 >= other->constant.f32; + case FLOAT64_TYPE: + return constant.f64 >= other->constant.f64; + default: XEASSERTALWAYS(); return false; + } + } + bool IsConstantULT(Value* other) const { + XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT); + switch (type) { + case INT8_TYPE: + return (uint8_t)constant.i8 < (uint8_t)other->constant.i8; + case INT16_TYPE: + return (uint16_t)constant.i16 < (uint16_t)other->constant.i16; + case INT32_TYPE: + return (uint32_t)constant.i32 < (uint32_t)other->constant.i32; + case INT64_TYPE: + return (uint64_t)constant.i64 < (uint64_t)other->constant.i64; + case FLOAT32_TYPE: + return constant.f32 < other->constant.f32; + case FLOAT64_TYPE: + return constant.f64 < other->constant.f64; + default: XEASSERTALWAYS(); return false; + } + } + bool IsConstantULE(Value* other) const { + XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT); + switch (type) { + case INT8_TYPE: + return (uint8_t)constant.i8 <= (uint8_t)other->constant.i8; + case INT16_TYPE: + return (uint16_t)constant.i16 <= (uint16_t)other->constant.i16; + case INT32_TYPE: + return (uint32_t)constant.i32 <= (uint32_t)other->constant.i32; + case INT64_TYPE: + return (uint64_t)constant.i64 <= (uint64_t)other->constant.i64; + case FLOAT32_TYPE: + return constant.f32 <= other->constant.f32; + case FLOAT64_TYPE: + return constant.f64 <= other->constant.f64; + default: XEASSERTALWAYS(); return false; + } + } + bool IsConstantUGT(Value* other) const { + XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT); + switch (type) { + case INT8_TYPE: + return (uint8_t)constant.i8 > (uint8_t)other->constant.i8; + case INT16_TYPE: + return (uint16_t)constant.i16 > (uint16_t)other->constant.i16; + case INT32_TYPE: + return (uint32_t)constant.i32 > (uint32_t)other->constant.i32; + case INT64_TYPE: + return (uint64_t)constant.i64 > (uint64_t)other->constant.i64; + case FLOAT32_TYPE: + return constant.f32 > other->constant.f32; + case FLOAT64_TYPE: + return constant.f64 > other->constant.f64; + default: XEASSERTALWAYS(); return false; + } + } + bool IsConstantUGE(Value* other) const { + XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT); + switch (type) { + case INT8_TYPE: + return (uint8_t)constant.i8 >= (uint8_t)other->constant.i8; + case INT16_TYPE: + return (uint16_t)constant.i16 >= (uint16_t)other->constant.i16; + case INT32_TYPE: + return (uint32_t)constant.i32 >= (uint32_t)other->constant.i32; + case INT64_TYPE: + return (uint64_t)constant.i64 >= (uint64_t)other->constant.i64; + case FLOAT32_TYPE: + return constant.f32 >= other->constant.f32; + case FLOAT64_TYPE: + return constant.f64 >= other->constant.f64; + default: XEASSERTALWAYS(); return false; + } + } uint32_t AsUint32(); uint64_t AsUint64(); diff --git a/src/alloy/runtime/runtime.cc b/src/alloy/runtime/runtime.cc index d39ac4220..3fc45a447 100644 --- a/src/alloy/runtime/runtime.cc +++ b/src/alloy/runtime/runtime.cc @@ -58,7 +58,7 @@ Runtime::~Runtime() { // TODO(benvanik): based on compiler support #include -//#include +#include int Runtime::Initialize(Frontend* frontend, Backend* backend) { // Must be initialized by subclass before calling into this.