diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index fe17dba309..b834fe8aed 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -43,6 +43,18 @@ enum class i4 : char { }; +template +concept LLVMType = (std::is_pointer_v) && (std::is_base_of_v>); + +template +concept LLVMValue = (std::is_pointer_v) && (std::is_base_of_v>); + +template +concept DSLValue = requires (T& v) +{ + { v.eval(std::declval*>()) } -> LLVMValue; +}; + template struct llvm_value_t { @@ -2948,7 +2960,7 @@ public: } // Call external function: provide name and function pointer - template + template llvm::CallInst* call(std::string_view lame, RT(*_func)(FArgs...), Args... args) { static_assert(sizeof...(FArgs) == sizeof...(Args), "spu_llvm_recompiler::call(): unexpected arg number"); @@ -2966,6 +2978,22 @@ public: return inst; } + template requires (sizeof...(Args) != 0) + auto call(std::string_view name, RT(*_func)(FArgs...), Args&&... args) + { + llvm_value_t r; + r.value = call(name, _func, std::forward(args).eval(m_ir)...); + return r; + } + + template + auto call(llvm::Function* func, Args&&... args) + { + llvm_value_t r; + r.value = m_ir->CreateCall(func, {std::forward(args).eval(m_ir)...}); + return r; + } + // Bitcast with immediate constant folding llvm::Value* bitcast(llvm::Value* val, llvm::Type* type) const; @@ -3657,10 +3685,22 @@ struct fmt_unveil #endif template <> -inline llvm::Type* cpu_translator::get_type<__m128i>() +struct llvm_value_t<__m128> : llvm_value_t { - return llvm::VectorType::get(llvm::Type::getInt8Ty(m_context), 16, false); -} + +}; + +template <> +struct llvm_value_t<__m128d> : llvm_value_t +{ + +}; + +template <> +struct llvm_value_t<__m128i> : llvm_value_t +{ + +}; #ifndef _MSC_VER #pragma GCC diagnostic pop diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 234e04d17c..4c2f139fd8 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -523,31 +523,6 @@ Value* PPUTranslator::Broadcast(Value* value, u32 count) return m_ir->CreateVectorSplat(count, value); } -std::pair PPUTranslator::Saturate(Value* value, CmpInst::Predicate inst, Value* extreme) -{ - // Modify args - if (auto v = dyn_cast(value->getType()); v && !extreme->getType()->isVectorTy()) - extreme = Broadcast(extreme, v->getNumElements()); - if (auto e = dyn_cast(extreme->getType()); e && !value->getType()->isVectorTy()) - value = Broadcast(value, e->getNumElements()); - - // Compare args - const auto cmp = m_ir->CreateICmp(inst, value, extreme); - - // Return saturated result and saturation bitmask - return{m_ir->CreateSelect(cmp, extreme, value), cmp}; -} - -std::pair PPUTranslator::SaturateSigned(Value* value, u64 min, u64 max) -{ - const auto type = value->getType()->getScalarType(); - const auto sat_l = Saturate(value, ICmpInst::ICMP_SLT, ConstantInt::get(type, min, true)); - const auto sat_h = Saturate(sat_l.first, ICmpInst::ICMP_SGT, ConstantInt::get(type, max, true)); - - // Return saturated result and saturation bitmask - return{sat_h.first, m_ir->CreateOr(sat_l.second, sat_h.second)}; -} - Value* PPUTranslator::Shuffle(Value* left, Value* right, std::initializer_list indices) { const auto type = left->getType(); @@ -947,14 +922,18 @@ void PPUTranslator::VCTUXS(ppu_opcode_t op) SetSat(IsNotZero(eval(sat_l | sat_h).value)); } +extern __m128 sse_exp2_ps(__m128); + void PPUTranslator::VEXPTEFP(ppu_opcode_t op) { - SetVr(op.vd, Call(GetType(), m_pure_attr, "__vexptefp", GetVr(op.vb, VrType::vf))); + set_vr(op.vd, call("__vexptefp", &sse_exp2_ps, get_vr(op.vb))); } +extern __m128 sse_log2_ps(__m128); + void PPUTranslator::VLOGEFP(ppu_opcode_t op) { - SetVr(op.vd, Call(GetType(), m_pure_attr, "__vlogefp", GetVr(op.vb, VrType::vf))); + set_vr(op.vd, call("__vlogefp", &sse_log2_ps, get_vr(op.vb))); } void PPUTranslator::VMADDFP(ppu_opcode_t op) @@ -1407,28 +1386,27 @@ void PPUTranslator::VPKUWUS(ppu_opcode_t op) void PPUTranslator::VREFP(ppu_opcode_t op) { - const auto result = VecHandleResult(m_ir->CreateFDiv(ConstantVector::getSplat({4, false}, ConstantFP::get(GetType(), 1.0)), GetVr(op.vb, VrType::vf))); - SetVr(op.vd, result); + set_vr(op.vd, vec_handle_result(fsplat(1.0) / get_vr(op.vb))); } void PPUTranslator::VRFIM(ppu_opcode_t op) { - SetVr(op.vd, VecHandleResult(Call(GetType(), "llvm.floor.v4f32", GetVr(op.vb, VrType::vf)))); + set_vr(op.vd, vec_handle_result(call(get_intrinsic(Intrinsic::floor), get_vr(op.vb)))); } void PPUTranslator::VRFIN(ppu_opcode_t op) { - SetVr(op.vd, VecHandleResult(Call(GetType(), "llvm.nearbyint.v4f32", GetVr(op.vb, VrType::vf)))); + set_vr(op.vd, vec_handle_result(call(get_intrinsic(Intrinsic::nearbyint), get_vr(op.vb)))); } void PPUTranslator::VRFIP(ppu_opcode_t op) { - SetVr(op.vd, VecHandleResult(Call(GetType(), "llvm.ceil.v4f32", GetVr(op.vb, VrType::vf)))); + set_vr(op.vd, vec_handle_result(call(get_intrinsic(Intrinsic::ceil), get_vr(op.vb)))); } void PPUTranslator::VRFIZ(ppu_opcode_t op) { - SetVr(op.vd, VecHandleResult(Call(GetType(), "llvm.trunc.v4f32", GetVr(op.vb, VrType::vf)))); + set_vr(op.vd, vec_handle_result(call(get_intrinsic(Intrinsic::trunc), get_vr(op.vb)))); } void PPUTranslator::VRLB(ppu_opcode_t op) @@ -1451,9 +1429,7 @@ void PPUTranslator::VRLW(ppu_opcode_t op) void PPUTranslator::VRSQRTEFP(ppu_opcode_t op) { - const auto result = m_ir->CreateFDiv(ConstantVector::getSplat({4, false}, ConstantFP::get(GetType(), 1.0)), Call(GetType(), "llvm.sqrt.v4f32", GetVr(op.vb, VrType::vf))); - - SetVr(op.vd, VecHandleResult(result)); + set_vr(op.vd, vec_handle_result(fsplat(1.0) / call(get_intrinsic(Intrinsic::sqrt), get_vr(op.vb)))); } void PPUTranslator::VSEL(ppu_opcode_t op) @@ -1520,159 +1496,162 @@ void PPUTranslator::VSEL(ppu_opcode_t op) void PPUTranslator::VSL(ppu_opcode_t op) { // TODO (very rare) - SetVr(op.vd, m_ir->CreateShl(GetVr(op.va, VrType::i128), m_ir->CreateAnd(GetVr(op.vb, VrType::i128), 7))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a << (b & 7)); } void PPUTranslator::VSLB(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi8, op.va, op.vb); - SetVr(op.vd, m_ir->CreateShl(ab[0], m_ir->CreateAnd(ab[1], 7))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a << (b & 7)); } void PPUTranslator::VSLDOI(ppu_opcode_t op) { if (op.vsh == 0) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, ab[0]); + set_vr(op.vd, get_vr(op.va)); } else if ((op.vsh % 4) == 0) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto s = op.vsh / 4; - SetVr(op.vd, Shuffle(ab[0], ab[1], { s, s + 1, s + 2, s + 3 })); + const auto x = 7; + set_vr(op.vd, shuffle2(b, a, (s + 3) ^ x, (s + 2) ^ x, (s + 1) ^ x, (s) ^ x)); } else if ((op.vsh % 2) == 0) { - const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto s = op.vsh / 2; - SetVr(op.vd, Shuffle(ab[0], ab[1], { s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7 })); + const auto x = 15; + set_vr(op.vd, shuffle2(b, a, (s + 7) ^ x, (s + 6) ^ x, (s + 5) ^ x, (s + 4) ^ x, (s + 3) ^ x, (s + 2) ^ x, (s + 1) ^ x, (s) ^ x)); } else { - const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto s = op.vsh; - SetVr(op.vd, Shuffle(ab[0], ab[1], { s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7, s + 8, s + 9, s + 10, s + 11, s + 12, s + 13, s + 14, s + 15 })); + const auto x = 31; + set_vr(op.vd, shuffle2(b, a, (s + 15) ^ x, (s + 14) ^ x, (s + 13) ^ x, (s + 12) ^ x, (s + 11) ^ x, (s + 10) ^ x, (s + 9) ^ x, (s + 8) ^ x, (s + 7) ^ x, (s + 6) ^ x, (s + 5) ^ x, (s + 4) ^ x, (s + 3) ^ x, (s + 2) ^ x, (s + 1) ^ x, (s) ^ x)); } } void PPUTranslator::VSLH(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi16, op.va, op.vb); - SetVr(op.vd, m_ir->CreateShl(ab[0], m_ir->CreateAnd(ab[1], 15))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a << (b & 15)); } void PPUTranslator::VSLO(ppu_opcode_t op) { // TODO (rare) - SetVr(op.vd, m_ir->CreateShl(GetVr(op.va, VrType::i128), m_ir->CreateAnd(GetVr(op.vb, VrType::i128), 0x78))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a << (b & 0x78)); } void PPUTranslator::VSLW(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateShl(ab[0], m_ir->CreateAnd(ab[1], 31))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a << (b & 31)); } void PPUTranslator::VSPLTB(ppu_opcode_t op) { - const u32 ui = op.vuimm & 0xf; - SetVr(op.vd, Shuffle(GetVr(op.vb, VrType::vi8), nullptr, { ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui })); + const u32 ui = ~op.vuimm & 0xf; + set_vr(op.vd, zshuffle(get_vr(op.vb), ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui)); } void PPUTranslator::VSPLTH(ppu_opcode_t op) { - const u32 ui = op.vuimm & 0x7; - SetVr(op.vd, Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { ui, ui, ui, ui, ui, ui, ui, ui })); + const u32 ui = ~op.vuimm & 0x7; + set_vr(op.vd, zshuffle(get_vr(op.vb), ui, ui, ui, ui, ui, ui, ui, ui)); } void PPUTranslator::VSPLTISB(ppu_opcode_t op) { - SetVr(op.vd, ConstantVector::getSplat({16, false}, m_ir->getInt8(op.vsimm))); + set_vr(op.vd, splat(op.vsimm)); } void PPUTranslator::VSPLTISH(ppu_opcode_t op) { - SetVr(op.vd, ConstantVector::getSplat({8, false}, m_ir->getInt16(op.vsimm))); + set_vr(op.vd, splat(op.vsimm)); } void PPUTranslator::VSPLTISW(ppu_opcode_t op) { - SetVr(op.vd, ConstantVector::getSplat({4, false}, m_ir->getInt32(op.vsimm))); + set_vr(op.vd, splat(op.vsimm)); } void PPUTranslator::VSPLTW(ppu_opcode_t op) { - const u32 ui = op.vuimm & 0x3; - SetVr(op.vd, Shuffle(GetVr(op.vb, VrType::vi32), nullptr, { ui, ui, ui, ui })); + const u32 ui = ~op.vuimm & 0x3; + set_vr(op.vd, zshuffle(get_vr(op.vb), ui, ui, ui, ui)); } void PPUTranslator::VSR(ppu_opcode_t op) { // TODO (very rare) - SetVr(op.vd, m_ir->CreateLShr(GetVr(op.va, VrType::i128), m_ir->CreateAnd(GetVr(op.vb, VrType::i128), 7))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a >> (b & 7)); } void PPUTranslator::VSRAB(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi8, op.va, op.vb); - SetVr(op.vd, m_ir->CreateAShr(ab[0], m_ir->CreateAnd(ab[1], 7))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a >> (b & 7)); } void PPUTranslator::VSRAH(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi16, op.va, op.vb); - SetVr(op.vd, m_ir->CreateAShr(ab[0], m_ir->CreateAnd(ab[1], 15))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a >> (b & 15)); } void PPUTranslator::VSRAW(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateAShr(ab[0], m_ir->CreateAnd(ab[1], 31))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a >> (b & 31)); } void PPUTranslator::VSRB(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi8, op.va, op.vb); - SetVr(op.vd, m_ir->CreateLShr(ab[0], m_ir->CreateAnd(ab[1], 7))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a >> (b & 7)); } void PPUTranslator::VSRH(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi16, op.va, op.vb); - SetVr(op.vd, m_ir->CreateLShr(ab[0], m_ir->CreateAnd(ab[1], 15))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a >> (b & 15)); } void PPUTranslator::VSRO(ppu_opcode_t op) { // TODO (very rare) - SetVr(op.vd, m_ir->CreateLShr(GetVr(op.va, VrType::i128), m_ir->CreateAnd(GetVr(op.vb, VrType::i128), 0x78))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a >> (b & 0x78)); } void PPUTranslator::VSRW(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateLShr(ab[0], m_ir->CreateAnd(ab[1], 31))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a >> (b & 31)); } void PPUTranslator::VSUBCUW(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); set_vr(op.vd, zext(a >= b)); } void PPUTranslator::VSUBFP(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); - SetVr(op.vd, VecHandleResult(eval(a - b).eval(m_ir))); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, vec_handle_result(a - b)); } void PPUTranslator::VSUBSBS(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); SetSat(IsNotZero(eval(r != (a - b)).value)); @@ -1680,8 +1659,7 @@ void PPUTranslator::VSUBSBS(ppu_opcode_t op) void PPUTranslator::VSUBSHS(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); SetSat(IsNotZero(eval(r != (a - b)).value)); @@ -1689,8 +1667,7 @@ void PPUTranslator::VSUBSHS(ppu_opcode_t op) void PPUTranslator::VSUBSWS(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); SetSat(IsNotZero(eval(r != (a - b)).value)); @@ -1698,15 +1675,13 @@ void PPUTranslator::VSUBSWS(ppu_opcode_t op) void PPUTranslator::VSUBUBM(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); set_vr(op.vd, eval(a - b)); } void PPUTranslator::VSUBUBS(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); SetSat(IsNotZero(eval(r != (a - b)).value)); @@ -1714,15 +1689,13 @@ void PPUTranslator::VSUBUBS(ppu_opcode_t op) void PPUTranslator::VSUBUHM(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); set_vr(op.vd, eval(a - b)); } void PPUTranslator::VSUBUHS(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); SetSat(IsNotZero(eval(r != (a - b)).value)); @@ -1730,15 +1703,13 @@ void PPUTranslator::VSUBUHS(ppu_opcode_t op) void PPUTranslator::VSUBUWM(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); set_vr(op.vd, eval(a - b)); } void PPUTranslator::VSUBUWS(ppu_opcode_t op) { - const auto a = get_vr(op.va); - const auto b = get_vr(op.vb); + const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); SetSat(IsNotZero(eval(r != (a - b)).value)); @@ -1746,102 +1717,109 @@ void PPUTranslator::VSUBUWS(ppu_opcode_t op) void PPUTranslator::VSUMSWS(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - const auto a = SExt(ab[0]); - const auto b = SExt(m_ir->CreateExtractElement(ab[1], m_ir->getInt32(m_is_be ? 3 : 0))); - const auto e0 = m_ir->CreateExtractElement(a, m_ir->getInt32(0)); - const auto e1 = m_ir->CreateExtractElement(a, m_ir->getInt32(1)); - const auto e2 = m_ir->CreateExtractElement(a, m_ir->getInt32(2)); - const auto e3 = m_ir->CreateExtractElement(a, m_ir->getInt32(3)); - const auto saturated = SaturateSigned(Add({ b, e0, e1, e2, e3 }), -0x80000000ll, 0x7fffffff); - SetVr(op.vd, ZExt(m_ir->CreateAnd(saturated.first, 0xffffffff))); - SetSat(saturated.second); + const auto [a, b] = get_vrs(op.va, op.vb); + const auto x = sext(zshuffle(a, 0, 1)); + const auto y = sext(zshuffle(a, 2, 3)); + const auto z = sext(zshuffle(b, 0, 4)); + const auto s = eval(x + y + z); + const auto r = min(max(zshuffle(s, 0, 2) + zshuffle(s, 1, 2), splat(-0x8000'0000ll)), splat(0x7fff'ffff)); + set_vr(op.vd, zshuffle(bitcast(r), 0, 4, 4, 4)); + SetSat(IsNotZero(eval((r + 0x8000'0000) >> 32).value)); } void PPUTranslator::VSUM2SWS(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - const auto b = SExt(Shuffle(ab[1], nullptr, { 1, 3 })); - const auto a = SExt(ab[0]); - const auto e0 = Shuffle(a, nullptr, { 0, 2 }); - const auto e1 = Shuffle(a, nullptr, { 1, 3 }); - const auto saturated = SaturateSigned(Add({ b, e0, e1 }), -0x80000000ll, 0x7fffffff); - SetVr(op.vd, m_ir->CreateAnd(saturated.first, 0xffffffff)); - SetSat(IsNotZero(saturated.second)); + const auto [a, b] = get_vrs(op.va, op.vb); + const auto x = a << 32 >> 32; + const auto y = a >> 32; + const auto z = b >> 32; + const auto r = min(max(x + y + z, splat(-0x8000'0000ll)), splat(0x7fff'ffff)); + set_vr(op.vd, zshuffle(bitcast(r), 0, 4, 2, 4)); + SetSat(IsNotZero(eval((r + 0x8000'0000) >> 32).value)); } void PPUTranslator::VSUM4SBS(ppu_opcode_t op) { - const auto a = SExt(GetVr(op.va, VrType::vi8), GetType()); - const auto b = GetVr(op.vb, VrType::vi32); - const auto e0 = Shuffle(a, nullptr, { 0, 4, 8, 12 }); - const auto e1 = Shuffle(a, nullptr, { 1, 5, 9, 13 }); - const auto e2 = Shuffle(a, nullptr, { 2, 6, 10, 14 }); - const auto e3 = Shuffle(a, nullptr, { 3, 7, 11, 15 }); - const auto result = m_ir->CreateAdd(SExt(b), SExt(Add({ e0, e1, e2, e3 }))); // Summ, (e0+e1+e2+e3) is small - const auto saturated = SaturateSigned(result, -0x80000000ll, 0x7fffffff); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto x = a << 24 >> 24; + const auto y = a << 16 >> 24; + const auto z = a << 8 >> 24; + const auto w = a >> 24; + const auto s = eval(x + y + z + w); // Can't overflow + const auto r = add_sat(s, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (s + b)).value)); } void PPUTranslator::VSUM4SHS(ppu_opcode_t op) { - const auto a = SExt(GetVr(op.va, VrType::vi16)); - const auto b = GetVr(op.vb, VrType::vi32); - const auto e0 = Shuffle(a, nullptr, { 0, 2, 4, 6 }); - const auto e1 = Shuffle(a, nullptr, { 1, 3, 5, 7 }); - const auto result = m_ir->CreateAdd(SExt(b), SExt(Add({ e0, e1 }))); // Summ, (e0+e1) is small - const auto saturated = SaturateSigned(result, -0x80000000ll, 0x7fffffff); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto x = a << 16 >> 16; + const auto y = a >> 16; + const auto s = eval(x + y); // Can't overflow + const auto r = add_sat(s, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (s + b)).value)); } void PPUTranslator::VSUM4UBS(ppu_opcode_t op) { - const auto a = ZExt(GetVr(op.va, VrType::vi8), GetType()); - const auto b = GetVr(op.vb, VrType::vi32); - const auto e0 = Shuffle(a, nullptr, { 0, 4, 8, 12 }); - const auto e1 = Shuffle(a, nullptr, { 1, 5, 9, 13 }); - const auto e2 = Shuffle(a, nullptr, { 2, 6, 10, 14 }); - const auto e3 = Shuffle(a, nullptr, { 3, 7, 11, 15 }); - const auto r = Add({ b, e0, e1, e2, e3 }); // Summ, (e0+e1+e2+e3) is small - const auto s = m_ir->CreateICmpULT(r, b); // Carry (saturation) - SetVr(op.vd, m_ir->CreateSelect(s, ConstantVector::getSplat({4, false}, m_ir->getInt32(0xffffffff)), r)); - SetSat(IsNotZero(s)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto x = a & 0xff; + const auto y = a << 16 >> 24; + const auto z = a << 8 >> 24; + const auto w = a >> 24; + const auto s = eval(x + y + z + w); // Can't overflow + const auto r = add_sat(s, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (s + b)).value)); } -#define UNPACK_PIXEL_OP(px) m_ir->CreateOr(m_ir->CreateAnd(px, 0xff00001f), m_ir->CreateOr(m_ir->CreateAnd(m_ir->CreateShl(px, 6), 0x1f0000), m_ir->CreateAnd(m_ir->CreateShl(px, 3), 0x1f00))) +#define UNPACK_PIXEL_OP(px) (px & 0xff00001f) | ((px << 6) & 0x1f0000) | ((px << 3) & 0x1f00) void PPUTranslator::VUPKHPX(ppu_opcode_t op) { - const auto px = SExt(Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { 0, 1, 2, 3 })); - SetVr(op.vd, UNPACK_PIXEL_OP(px)); + // Caution: potentially out-of-lane algorithm + const auto px = sext(zshuffle(get_vr(op.vb), 4, 5, 6, 7)); + set_vr(op.vd, UNPACK_PIXEL_OP(px)); } void PPUTranslator::VUPKHSB(ppu_opcode_t op) { - SetVr(op.vd, SExt(Shuffle(GetVr(op.vb, VrType::vi8), nullptr, { 0, 1, 2, 3, 4, 5, 6, 7 }))); + // Caution: potentially out-of-lane algorithm + const auto r = sext(zshuffle(get_vr(op.vb), 8, 9, 10, 11, 12, 13, 14, 15)); + set_vr(op.vd, r); } void PPUTranslator::VUPKHSH(ppu_opcode_t op) { - SetVr(op.vd, SExt(Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { 0, 1, 2, 3 }))); + // Caution: potentially out-of-lane algorithm + const auto r = sext(zshuffle(get_vr(op.vb), 4, 5, 6, 7)); + set_vr(op.vd, r); } void PPUTranslator::VUPKLPX(ppu_opcode_t op) { - const auto px = SExt(Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { 4, 5, 6, 7 })); - SetVr(op.vd, UNPACK_PIXEL_OP(px)); + // Caution: potentially out-of-lane algorithm + const auto px = sext(zshuffle(get_vr(op.vb), 0, 1, 2, 3)); + set_vr(op.vd, UNPACK_PIXEL_OP(px)); } void PPUTranslator::VUPKLSB(ppu_opcode_t op) { - SetVr(op.vd, SExt(Shuffle(GetVr(op.vb, VrType::vi8), nullptr, { 8, 9, 10, 11, 12, 13, 14, 15 }))); + // Caution: potentially out-of-lane algorithm + const auto r = sext(zshuffle(get_vr(op.vb), 0, 1, 2, 3, 4, 5, 6, 7)); + set_vr(op.vd, r); } void PPUTranslator::VUPKLSH(ppu_opcode_t op) { - SetVr(op.vd, SExt(Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { 4, 5, 6, 7 }))); + // Caution: potentially out-of-lane algorithm + const auto r = sext(zshuffle(get_vr(op.vb), 0, 1, 2, 3)); + set_vr(op.vd, r); } void PPUTranslator::VXOR(ppu_opcode_t op) @@ -1849,12 +1827,12 @@ void PPUTranslator::VXOR(ppu_opcode_t op) if (op.va == op.vb) { // Assign zero, break dependencies - SetVr(op.vd, ConstantAggregateZero::get(GetType())); + set_vr(op.vd, splat(0)); return; } - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateXor(ab[0], ab[1])); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, a ^ b); } void PPUTranslator::TDI(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 8a0ba77afc..f972120b31 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -173,14 +173,6 @@ public: // Load vr llvm::Value* GetVr(u32 vr, VrType); - // Load VRs - template - std::array GetVrs(VrType type, Vrs... regs) - { - static_assert(sizeof...(Vrs), "Empty VR list"); - return{ GetVr(regs, type)... }; - } - // Set vr to the specified value void SetVr(u32 vr, llvm::Value*); @@ -196,12 +188,6 @@ public: // Broadcast specified value llvm::Value* Broadcast(llvm::Value* value, u32 count); - // Saturate scalar or vector given the comparison operand and the extreme value to compare with (second result is the comparison result) - std::pair Saturate(llvm::Value* value, llvm::CmpInst::Predicate inst, llvm::Value* extreme); - - // Saturate signed value (second result is the disjunction of comparison results) - std::pair SaturateSigned(llvm::Value* value, u64 min, u64 max); - // Create shuffle instruction with constant args llvm::Value* Shuffle(llvm::Value* left, llvm::Value* right, std::initializer_list indices);