diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index f2052f719e..6fc0203640 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -1405,6 +1405,55 @@ struct llvm_sub_sat } }; +template , typename U = llvm_common_t> +struct llvm_extract +{ + using type = std::remove_extent_t; + + llvm_expr_t a1; + llvm_expr_t i2; + + static_assert(llvm_value_t::is_vector, "llvm_extract<>: invalid type"); + static_assert(llvm_value_t::is_int && !llvm_value_t::is_vector, "llvm_extract<>: invalid index type"); + + static constexpr bool is_ok = llvm_value_t::is_vector && + llvm_value_t::is_int && !llvm_value_t::is_vector; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = i2.eval(ir); + + return ir->CreateExtractElement(v1, v2); + } +}; + +template , typename U = llvm_common_t, typename V = llvm_common_t> +struct llvm_insert +{ + using type = T; + + llvm_expr_t a1; + llvm_expr_t i2; + llvm_expr_t a3; + + static_assert(llvm_value_t::is_vector, "llvm_insert<>: invalid type"); + static_assert(llvm_value_t::is_int && !llvm_value_t::is_vector, "llvm_insert<>: invalid index type"); + static_assert(std::is_same_v>, "llvm_insert<>: invalid element type"); + + static constexpr bool is_ok = llvm_extract::is_ok && + std::is_same_v>; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = i2.eval(ir); + const auto v3 = a3.eval(ir); + + return ir->CreateInsertElement(v1, v3, v2); + } +}; + class cpu_translator { protected: @@ -1559,6 +1608,30 @@ public: return llvm_sub_sat{std::forward(a), std::forward(b)}; } + template ::is_ok>> + static auto extract(T&& v, U&& i) + { + return llvm_extract{std::forward(v), std::forward(i)}; + } + + template >::is_ok>> + static auto extract(T&& v, u32 i) + { + return llvm_extract>{std::forward(v), llvm_const_int{i}}; + } + + template ::is_ok>> + static auto insert(T&& v, U&& i, V&& e) + { + return llvm_insert{std::forward(v), std::forward(i), std::forward(e)}; + } + + template , V>::is_ok>> + static auto insert(T&& v, u32 i, V&& e) + { + return llvm_insert, V>{std::forward(v), llvm_const_int{i}, std::forward(e)}; + } + // Average: (a + b + 1) >> 1 template inline auto avg(T a, T b) @@ -1580,22 +1653,6 @@ public: return result; } - template - auto insert(T v, u64 i, E e) - { - value_t result; - result.value = m_ir->CreateInsertElement(v.eval(m_ir), e.eval(m_ir), i); - return result; - } - - template - auto extract(T v, u64 i) - { - typename value_t::base result; - result.value = m_ir->CreateExtractElement(v.eval(m_ir), i); - return result; - } - template auto splat(u64 c) { diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 93355071e5..1ecacf7ad2 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -4326,7 +4326,7 @@ public: void WRCH(spu_opcode_t op) // { - const auto val = extract(get_vr(op.rt), 3); + const auto val = eval(extract(get_vr(op.rt), 3)); if (m_interp_magn) { @@ -5051,8 +5051,7 @@ public: const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); const auto i = eval(~s & 0xf); auto r = build(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10); - r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt8(0x3), i.value); - set_vr(op.rt, r); + set_vr(op.rt, insert(r, i, splat(0x03))); } void CHX(spu_opcode_t op) @@ -5060,8 +5059,7 @@ public: const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); const auto i = eval(~s >> 1 & 0x7); auto r = build(0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011); - r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt16(0x0203), i.value); - set_vr(op.rt, r); + set_vr(op.rt, insert(r, i, splat(0x0203))); } void CWX(spu_opcode_t op) @@ -5069,8 +5067,7 @@ public: const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); const auto i = eval(~s >> 2 & 0x3); auto r = build(0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213); - r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt32(0x010203), i.value); - set_vr(op.rt, r); + set_vr(op.rt, insert(r, i, splat(0x010203))); } void CDX(spu_opcode_t op) @@ -5078,8 +5075,7 @@ public: const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); const auto i = eval(~s >> 3 & 0x1); auto r = build(0x18191a1b1c1d1e1f, 0x1011121314151617); - r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt64(0x01020304050607), i.value); - set_vr(op.rt, r); + set_vr(op.rt, insert(r, i, splat(0x01020304050607))); } void ROTQBI(spu_opcode_t op) @@ -5143,8 +5139,7 @@ public: const auto a = eval(extract(get_vr(op.ra), 3) + get_imm(op.i7)); const auto i = eval(~a & 0xf); auto r = build(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10); - r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt8(0x3), i.value); - set_vr(op.rt, r); + set_vr(op.rt, insert(r, i, splat(0x03))); } void CHD(spu_opcode_t op) @@ -5152,8 +5147,7 @@ public: const auto a = eval(extract(get_vr(op.ra), 3) + get_imm(op.i7)); const auto i = eval(~a >> 1 & 0x7); auto r = build(0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011); - r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt16(0x0203), i.value); - set_vr(op.rt, r); + set_vr(op.rt, insert(r, i, splat(0x0203))); } void CWD(spu_opcode_t op) @@ -5161,8 +5155,7 @@ public: const auto a = eval(extract(get_vr(op.ra), 3) + get_imm(op.i7)); const auto i = eval(~a >> 2 & 0x3); auto r = build(0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213); - r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt32(0x010203), i.value); - set_vr(op.rt, r); + set_vr(op.rt, insert(r, i, splat(0x010203))); } void CDD(spu_opcode_t op) @@ -5170,8 +5163,7 @@ public: const auto a = eval(extract(get_vr(op.ra), 3) + get_imm(op.i7)); const auto i = eval(~a >> 3 & 0x1); auto r = build(0x18191a1b1c1d1e1f, 0x1011121314151617); - r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt64(0x01020304050607), i.value); - set_vr(op.rt, r); + set_vr(op.rt, insert(r, i, splat(0x01020304050607))); } void ROTQBII(spu_opcode_t op) @@ -5646,22 +5638,22 @@ public: if (c0 && c1 && c1->getType() == get_type() && c1->getZExtValue() == 0x01020304050607) { vtype = get_type(); - _new = extract(get_vr(op.ra), 1).value; + _new = extract(get_vr(op.ra), 1).eval(m_ir); } else if (c0 && c1 && c1->getType() == get_type() && c1->getZExtValue() == 0x010203) { vtype = get_type(); - _new = extract(get_vr(op.ra), 3).value; + _new = extract(get_vr(op.ra), 3).eval(m_ir); } else if (c0 && c1 && c1->getType() == get_type() && c1->getZExtValue() == 0x0203) { vtype = get_type(); - _new = extract(get_vr(op.ra), 6).value; + _new = extract(get_vr(op.ra), 6).eval(m_ir); } else if (c0 && c1 && c1->getType() == get_type() && c1->getZExtValue() == 0x03) { vtype = get_type(); - _new = extract(get_vr(op.ra), 12).value; + _new = extract(get_vr(op.ra), 12).eval(m_ir); } if (vtype && _new)