diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index 52265b1b3..90877011f 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -189,14 +189,14 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { auto str = (const char*)i->src1.offset; //lb.Comment(str); //UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_NOP, [](X64Emitter& e, Instr*& i) { // If we got this, chances are we want it. e.nop(); - i = i->next; + i = e.Advance(i); return true; }); @@ -208,7 +208,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { // TODO(benvanik): translate source offsets for mapping? We're just passing // down the original offset - it may be nice to have two. e.MarkSourceOffset(i); - i = i->next; + i = e.Advance(i); return true; }); @@ -216,7 +216,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { // TODO(benvanik): insert a call to the debug break function to let the // debugger know. e.db(0xCC); - i = i->next; + i = e.Advance(i); return true; }); @@ -229,7 +229,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.db(0xCC); e.L(".x"); e.outLocalLabel(); - i = i->next; + i = e.Advance(i); return true; }); @@ -237,7 +237,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { // TODO(benvanik): insert a call to the trap function to let the // debugger know. e.db(0xCC); - i = i->next; + i = e.Advance(i); return true; }); @@ -250,7 +250,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.db(0xCC); e.L(".x"); e.outLocalLabel(); - i = i->next; + i = e.Advance(i); return true; }); @@ -262,7 +262,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.mov(e.rax, (uint64_t)Dummy); e.call(e.rax); UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -276,7 +276,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { UNIMPLEMENTED_SEQ(); e.L(".x"); e.outLocalLabel(); - i = i->next; + i = e.Advance(i); return true; }); @@ -284,7 +284,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.mov(e.rax, (uint64_t)Dummy); e.call(e.rax); UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -298,19 +298,19 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { UNIMPLEMENTED_SEQ(); e.L(".x"); e.outLocalLabel(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_RETURN, [](X64Emitter& e, Instr*& i) { e.ret(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SET_RETURN_ADDRESS, [](X64Emitter& e, Instr*& i) { //UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -321,7 +321,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { table->AddSequence(OPCODE_BRANCH, [](X64Emitter& e, Instr*& i) { auto target = i->src1.label; e.jmp(target->name, e.T_NEAR); - i = i->next; + i = e.Advance(i); return true; }); @@ -329,7 +329,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { CheckBoolean(e, i->src1.value); auto target = i->src2.label; e.je(target->name, e.T_NEAR); - i = i->next; + i = e.Advance(i); return true; }); @@ -337,7 +337,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { CheckBoolean(e, i->src1.value); auto target = i->src2.label; e.jne(target->name, e.T_NEAR); - i = i->next; + i = e.Advance(i); return true; }); @@ -379,14 +379,14 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { ASSERT_INVALID_TYPE(); } - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) { // Need a matrix. UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -436,7 +436,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { UNIMPLEMENTED_SEQ(); } - i = i->next; + i = e.Advance(i); return true; }); @@ -486,7 +486,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { UNIMPLEMENTED_SEQ(); } - i = i->next; + i = e.Advance(i); return true; }); @@ -536,31 +536,31 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { UNIMPLEMENTED_SEQ(); } - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_ROUND, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_CONVERT_I2F, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_CONVERT_F2I, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -572,19 +572,19 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { table->AddSequence(OPCODE_LOAD_VECTOR_SHL, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_LOAD_VECTOR_SHR, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_LOAD_CLOCK, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -632,7 +632,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { ASSERT_INVALID_TYPE(); } - i = i->next; + i = e.Advance(i); return true; }); @@ -691,7 +691,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { ASSERT_INVALID_TYPE(); } - i = i->next; + i = e.Advance(i); return true; }); @@ -755,7 +755,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { if (!i->src1.value->IsConstant()) { e.EndOp(addr_off); } - i = i->next; + i = e.Advance(i); return true; }); @@ -829,13 +829,13 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { if (!i->src1.value->IsConstant()) { e.EndOp(addr_off); } - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_PREFETCH, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -845,19 +845,19 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { table->AddSequence(OPCODE_MAX, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_MIN, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SELECT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -867,7 +867,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.BeginOp(i->dest, dest, REG_DEST); e.setnz(dest); e.EndOp(dest); - i = i->next; + i = e.Advance(i); return true; }); @@ -877,7 +877,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.BeginOp(i->dest, dest, REG_DEST); e.setz(dest); e.EndOp(dest); - i = i->next; + i = e.Advance(i); return true; }); @@ -889,7 +889,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.setne(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -901,7 +901,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.sete(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -913,7 +913,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.setge(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -925,7 +925,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.setg(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -937,7 +937,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.setle(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -949,7 +949,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.setl(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -961,7 +961,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.setae(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -973,7 +973,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.seta(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -985,7 +985,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.setbe(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); @@ -997,55 +997,55 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { e.setb(dest); } }); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_DID_CARRY, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_DID_OVERFLOW, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_DID_SATURATE, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_COMPARE_EQ, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_COMPARE_SGT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_COMPARE_SGE, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_COMPARE_UGT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_COMPARE_UGE, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -1241,97 +1241,97 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { ASSERT_INVALID_TYPE(); } - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_ADD_CARRY, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SUB, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_MUL, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_MUL_HI, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_DIV, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_MUL_ADD, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_MUL_SUB, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_POW2, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_LOG2, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_DOT_PRODUCT_3, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_DOT_PRODUCT_4, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -1487,61 +1487,61 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { ASSERT_INVALID_TYPE(); } - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_OR, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_XOR, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SHL, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_SHL, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SHR, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_SHR, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SHA, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_VECTOR_SHA, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -1589,7 +1589,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { UNIMPLEMENTED_SEQ(); } - i = i->next; + i = e.Advance(i); return true; }); @@ -1630,7 +1630,7 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { ASSERT_INVALID_TYPE(); } - i = i->next; + i = e.Advance(i); return true; }); @@ -1683,49 +1683,49 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { } else { UNIMPLEMENTED_SEQ(); } - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_INSERT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_EXTRACT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SPLAT, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_PERMUTE, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_SWIZZLE, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_PACK, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_UNPACK, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); @@ -1735,25 +1735,25 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { table->AddSequence(OPCODE_COMPARE_EXCHANGE, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_ATOMIC_EXCHANGE, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_ATOMIC_ADD, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); table->AddSequence(OPCODE_ATOMIC_SUB, [](X64Emitter& e, Instr*& i) { UNIMPLEMENTED_SEQ(); - i = i->next; + i = e.Advance(i); return true; }); } diff --git a/src/alloy/backend/x64/lowering/lowering_table.cc b/src/alloy/backend/x64/lowering/lowering_table.cc index a8df8b7bc..6c5c8468b 100644 --- a/src/alloy/backend/x64/lowering/lowering_table.cc +++ b/src/alloy/backend/x64/lowering/lowering_table.cc @@ -9,6 +9,7 @@ #include +#include #include using namespace alloy; @@ -62,7 +63,7 @@ int LoweringTable::ProcessBlock(X64Emitter& e, hir::Block* block) { // No sequence found! XELOGE("Unable to process HIR opcode %s", instr->opcode->name); return 1; - instr = instr->next; + instr = e.Advance(instr); } } diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 5b7e94147..c0053b848 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -39,6 +39,7 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) : backend_(backend), code_cache_(backend->code_cache()), allocator_(allocator), + current_instr_(0), CodeGenerator(MAX_CODE_SIZE, AutoGrow, allocator) { xe_zero_struct(®_state_, sizeof(reg_state_)); } @@ -145,6 +146,7 @@ int X64Emitter::Emit(HIRBuilder* builder) { // Add instructions. // The table will process sequences of instructions to (try to) // generate optimal code. + current_instr_ = block->instr_head; if (lowering_table->ProcessBlock(*this, block)) { return 1; } @@ -163,6 +165,41 @@ int X64Emitter::Emit(HIRBuilder* builder) { return 0; } +void X64Emitter::EvictStaleRegs() { + // NOTE: if we are getting called it's because we *need* a register. + // We must get rid of something. + + uint32_t current_ordinal = current_instr_->ordinal; + + // Remove any register with no more uses. + uint32_t new_live_regs = 0; + for (size_t n = 0; n < 32; n++) { + uint32_t bit = 1 << n; + if (bit & reg_state_.active_regs) { + // Register is active and cannot be freed. + new_live_regs |= bit; + continue; + } + if (!(bit & reg_state_.live_regs)) { + // Register is not alive - nothing to do. + continue; + } + + // Register is live, not active. Check and see if we get rid of it. + auto v = reg_state_.reg_values[n]; + if (v->last_use->ordinal < current_ordinal) { + reg_state_.reg_values[n] = NULL; + } + } + + // Hrm. We have spilled. + if (reg_state_.live_regs == new_live_regs) { + XEASSERTALWAYS(); + } + + reg_state_.live_regs = new_live_regs; +} + void X64Emitter::FindFreeRegs( Value* v0, uint32_t& v0_idx, uint32_t v0_flags) { // If the value is already in a register, use it. @@ -183,15 +220,17 @@ void X64Emitter::FindFreeRegs( } else { avail_regs = 0xFFFF0000; } - uint32_t free_regs = avail_regs & ~reg_state_.active_regs; - if (free_regs) { - // Just take one. - _BitScanReverse((DWORD*)&v0_idx, free_regs); - } else { + uint32_t free_regs = avail_regs & ~reg_state_.live_regs; + if (!free_regs) { // Need to evict something. - XEASSERTALWAYS(); + EvictStaleRegs(); } + // Find the first available. + // We start from the MSB so that we get the non-rNx regs that are often + // in short supply. + _BitScanReverse((DWORD*)&v0_idx, free_regs); + reg_state_.active_regs |= 1 << v0_idx; reg_state_.live_regs |= 1 << v0_idx; v0->reg = v0_idx; @@ -204,8 +243,8 @@ void X64Emitter::FindFreeRegs( // TODO(benvanik): support REG_DEST reuse/etc. // Grab all already-present registers first. // This way we won't spill them trying to get new registers. - bool need_v0 = v0->reg != -1; - bool need_v1 = v1->reg != -1; + bool need_v0 = v0->reg == -1; + bool need_v1 = v1->reg == -1; if (!need_v0) { FindFreeRegs(v0, v0_idx, v0_flags); } @@ -228,9 +267,9 @@ void X64Emitter::FindFreeRegs( // TODO(benvanik): support REG_DEST reuse/etc. // Grab all already-present registers first. // This way we won't spill them trying to get new registers. - bool need_v0 = v0->reg != -1; - bool need_v1 = v1->reg != -1; - bool need_v2 = v2->reg != -1; + bool need_v0 = v0->reg == -1; + bool need_v1 = v1->reg == -1; + bool need_v2 = v2->reg == -1; if (!need_v0) { FindFreeRegs(v0, v0_idx, v0_flags); } @@ -260,10 +299,10 @@ void X64Emitter::FindFreeRegs( // TODO(benvanik): support REG_DEST reuse/etc. // Grab all already-present registers first. // This way we won't spill them trying to get new registers. - bool need_v0 = v0->reg != -1; - bool need_v1 = v1->reg != -1; - bool need_v2 = v2->reg != -1; - bool need_v3 = v3->reg != -1; + bool need_v0 = v0->reg == -1; + bool need_v1 = v1->reg == -1; + bool need_v2 = v2->reg == -1; + bool need_v3 = v3->reg == -1; if (!need_v0) { FindFreeRegs(v0, v0_idx, v0_flags); } @@ -291,6 +330,12 @@ void X64Emitter::FindFreeRegs( } } +Instr* X64Emitter::Advance(Instr* i) { + auto next = i->next; + current_instr_ = next; + return next; +} + void X64Emitter::MarkSourceOffset(Instr* i) { auto entry = source_map_arena_.Alloc(); entry->source_offset = i->src1.offset; diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index 8dd54399a..3125d0c07 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -112,6 +112,8 @@ public: GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2) | GetRegBit(r3)); } + void EvictStaleRegs(); + void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags); void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags, hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags); @@ -134,6 +136,8 @@ public: static uint32_t GetRegBit(const Xbyak::Reg64& r) { return 1 << r.getIdx(); } static uint32_t GetRegBit(const Xbyak::Xmm& r) { return 1 << (16 + r.getIdx()); } + hir::Instr* Advance(hir::Instr* i); + void MarkSourceOffset(hir::Instr* i); private: @@ -154,6 +158,7 @@ private: // Current register values. hir::Value* reg_values[32]; } reg_state_; + hir::Instr* current_instr_; size_t source_map_count_; Arena source_map_arena_; diff --git a/src/alloy/compiler/passes/value_reduction_pass.cc b/src/alloy/compiler/passes/value_reduction_pass.cc index 8e299a308..78367f35a 100644 --- a/src/alloy/compiler/passes/value_reduction_pass.cc +++ b/src/alloy/compiler/passes/value_reduction_pass.cc @@ -32,6 +32,9 @@ ValueReductionPass::~ValueReductionPass() { } void ValueReductionPass::ComputeLastUse(Value* value) { + // TODO(benvanik): compute during construction? + // Note that this list isn't sorted (unfortunately), so we have to scan + // them all. uint32_t max_ordinal = 0; Value::Use* last_use = NULL; auto use = value->use_head; @@ -42,7 +45,7 @@ void ValueReductionPass::ComputeLastUse(Value* value) { } use = use->next; } - value->tag = last_use->instr; + value->last_use = last_use->instr; } int ValueReductionPass::Run(HIRBuilder* builder) { @@ -73,30 +76,30 @@ int ValueReductionPass::Run(HIRBuilder* builder) { OpcodeSignatureType src3_type = GET_OPCODE_SIG_TYPE_SRC3(info->signature); if (src1_type == OPCODE_SIG_TYPE_V && !instr->src1.value->IsConstant()) { auto v = instr->src1.value; - if (!v->tag) { + if (!v->last_use) { ComputeLastUse(v); } - if (v->tag == instr) { + if (v->last_use == instr) { // Available. ordinals.set(v->ordinal, false); } } if (src2_type == OPCODE_SIG_TYPE_V && !instr->src2.value->IsConstant()) { auto v = instr->src2.value; - if (!v->tag) { + if (!v->last_use) { ComputeLastUse(v); } - if (v->tag == instr) { + if (v->last_use == instr) { // Available. ordinals.set(v->ordinal, false); } } if (src3_type == OPCODE_SIG_TYPE_V && !instr->src3.value->IsConstant()) { auto v = instr->src3.value; - if (!v->tag) { + if (!v->last_use) { ComputeLastUse(v); } - if (v->tag == instr) { + if (v->last_use == instr) { // Available. ordinals.set(v->ordinal, false); } diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index 508d37e13..ae8c5c93a 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -419,6 +419,7 @@ Value* HIRBuilder::AllocValue(TypeName type) { value->flags = 0; value->def = NULL; value->use_head = NULL; + value->last_use = NULL; value->tag = NULL; value->reg = -1; return value; @@ -432,6 +433,7 @@ Value* HIRBuilder::CloneValue(Value* source) { value->constant.v128 = source->constant.v128; value->def = NULL; value->use_head = NULL; + value->last_use = NULL; value->tag = NULL; value->reg = -1; return value; diff --git a/src/alloy/hir/value.h b/src/alloy/hir/value.h index 51957da5f..9c50e9807 100644 --- a/src/alloy/hir/value.h +++ b/src/alloy/hir/value.h @@ -70,6 +70,8 @@ public: Instr* def; Use* use_head; + // NOTE: for performance reasons this is not maintained during construction. + Instr* last_use; // TODO(benvanik): remove to shrink size. void* tag;