diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index 0573c928f..5ab38f41f 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -3206,17 +3206,36 @@ table->AddSequence(OPCODE_COMPARE_EXCHANGE, [](X64Emitter& e, Instr*& i) { }); table->AddSequence(OPCODE_ATOMIC_EXCHANGE, [](X64Emitter& e, Instr*& i) { - if (i->dest->type == INT32_TYPE) { - // dest = old_value = InterlockedExchange(src1 = address, src2 = new_value); + // dest = old_value = InterlockedExchange(src1 = address, src2 = new_value); + if (i->Match(SIG_TYPE_I32, SIG_TYPE_I64, SIG_TYPE_I32)) { Reg32 dest, src2; Reg64 src1; e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0, i->src2.value, src2, 0); + Reg64 real_src1 = src1; + if (dest.getIdx() == src1.getIdx()) { + e.mov(TEMP_REG, src1); + real_src1 = TEMP_REG; + } e.mov(dest, src2); e.lock(); - e.xchg(e.dword[src1], dest); + e.xchg(e.dword[real_src1], dest); e.EndOp(dest, src1, src2); + } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I64, SIG_TYPE_I32C)) { + Reg32 dest; + Reg64 src1; + e.BeginOp(i->dest, dest, REG_DEST, + i->src1.value, src1, 0); + Reg64 real_src1 = src1; + if (dest.getIdx() == src1.getIdx()) { + e.mov(TEMP_REG, src1); + real_src1 = TEMP_REG; + } + e.mov(dest, i->src2.value->constant.i32); + e.lock(); + e.xchg(e.dword[real_src1], dest); + e.EndOp(dest, src1); } else { ASSERT_INVALID_TYPE(); } diff --git a/src/alloy/backend/x64/lowering/op_utils.inl b/src/alloy/backend/x64/lowering/op_utils.inl index 0daac5d64..749e84901 100644 --- a/src/alloy/backend/x64/lowering/op_utils.inl +++ b/src/alloy/backend/x64/lowering/op_utils.inl @@ -1034,7 +1034,9 @@ void XmmTernaryOpVVV(X64Emitter& e, Instr*& i, xmm_vvv_fn vvv_fn, if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { vvv_fn(e, *i, dest, src1, src2); } else { - UNIMPLEMENTED_SEQ(); + e.movaps(e.xmm0, src3); + e.movaps(dest, src1); + vvv_fn(e, *i, dest, src2, e.xmm0); } } else { e.movaps(dest, src1); diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 8ae38d80d..3a9e6d142 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -42,7 +42,7 @@ const uint32_t X64Emitter::gpr_reg_map_[X64Emitter::GPR_COUNT] = { }; const uint32_t X64Emitter::xmm_reg_map_[X64Emitter::XMM_COUNT] = { - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, }; diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index ca13354a6..e006bf3f9 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -98,10 +98,10 @@ public: // Reserved: rsp // Scratch: rax/rcx/rdx // xmm0-1 - // Available: rbx, r12-r15 (maybe r8-r11, rbp, rsi, rdi?) - // xmm2-xmm15 + // Available: rbx, r12-r15 (save to get r8-r11, rbp, rsi, rdi?) + // xmm6-xmm15 (save to get xmm2-xmm5) static const int GPR_COUNT = 5; - static const int XMM_COUNT = 14; + static const int XMM_COUNT = 10; static void SetupReg(hir::Value* v, Xbyak::Reg8& r) { auto idx = gpr_reg_map_[v->reg.index];