diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 0224675dc5..56612b8279 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -3741,7 +3741,7 @@ public: const auto imm32 = immediate.eval(m_ir); const auto immediate2 = (llvm_const_int{e}); const auto imm8 = immediate2.eval(m_ir); - result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_mask_fixupimm_ps_128), {data0, data1, data2, imm32, imm8});\ + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_mask_fixupimm_ps_128), {data0, data1, data2, imm32, imm8}); return result; } diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 38816ed917..5c385c2238 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -124,7 +124,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Global LUTs llvm::GlobalVariable* m_spu_frest_fraction_lut{}; - llvm::GlobalVariable* m_spu_frest_exponent_lut{}; llvm::GlobalVariable* m_spu_frsqest_fraction_lut{}; llvm::GlobalVariable* m_spu_frsqest_exponent_lut{}; @@ -1507,7 +1506,6 @@ public: { // LUTs for some instructions m_spu_frest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 32), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_fraction_lut)); - m_spu_frest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_exponent_lut)); m_spu_frsqest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 64), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_fraction_lut)); m_spu_frsqest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_exponent_lut)); } @@ -6040,23 +6038,22 @@ public: const auto a = bitcast(value(ci->getOperand(0))); const auto a_fraction = (a >> splat(18)) & splat(0x1F); - const auto a_exponent = (a >> splat(23)) & splat(0xFF); + const auto a_exponent = (a & splat(0x7F800000u)); + const auto r_exponent = sub_sat(build(0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80), bitcast(a_exponent)); + const auto fix_exponent = select((a_exponent > 0), bitcast(r_exponent), splat(0x7F800000u)); const auto a_sign = (a & splat(0x80000000)); value_t final_result = eval(splat(0)); for (u32 i = 0; i < 4; i++) { const auto eval_fraction = eval(extract(a_fraction, i)); - const auto eval_exponent = eval(extract(a_exponent, i)); - const auto eval_sign = eval(extract(a_sign, i)); value_t r_fraction = load_const(m_spu_frest_fraction_lut, eval_fraction); - value_t r_exponent = load_const(m_spu_frest_exponent_lut, eval_exponent); - final_result = eval(insert(final_result, i, eval(r_fraction | eval_sign | r_exponent))); + final_result = eval(insert(final_result, i, r_fraction)); } - return bitcast(final_result); + return bitcast(bitcast(final_result | bitcast(fix_exponent) | a_sign)); }); set_vr(op.rt, frest(get_vr(op.ra))); @@ -6713,26 +6710,26 @@ public: const auto div_result = the_one / div; - return vfixupimmps(div_result, div_result, splat(0x00330088u), 0, 0xff); + return vfixupimmps(bitcast(splat(0xFFFFFFFFu)), div_result, splat(0x11001188u), 0, 0xff); }); } else { register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) { - const auto div = value(ci->getOperand(0)); - const auto the_one = value(ci->getOperand(1)); + const auto div = value(ci->getOperand(0)); + const auto the_one = value(ci->getOperand(1)); - const auto div_result = the_one / div; + const auto div_result = the_one / div; - // from ps3 hardware testing: Inf => NaN and NaN => Zero - const auto result_and = bitcast(div_result) & 0x7fffffffu; - const auto result_cmp_inf = sext(result_and == splat(0x7F800000u)); - const auto result_cmp_nan = sext(result_and <= splat(0x7F800000u)); + // from ps3 hardware testing: Inf => NaN and NaN => Zero + const auto result_and = bitcast(div_result) & 0x7fffffffu; + const auto result_cmp_inf = sext(result_and == splat(0x7F800000u)); + const auto result_cmp_nan = sext(result_and <= splat(0x7F800000u)); - const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu); - const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu); - return bitcast((bitcast(div_result) & and_mask) | or_mask); + const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu); + const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu); + return bitcast((bitcast(div_result) & and_mask) | or_mask); }); } @@ -7019,20 +7016,23 @@ public: { const auto a = bitcast(value(ci->getOperand(0))); const auto a_fraction = (a >> splat(18)) & splat(0x1F); - const auto a_exponent = (a >> splat(23)) & splat(0xFF); + const auto a_exponent = (a & splat(0x7F800000u)); + const auto r_exponent = sub_sat(build(0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80), bitcast(a_exponent)); + const auto fix_exponent = select((a_exponent > 0), bitcast(r_exponent), splat(0x7F800000u)); const auto a_sign = (a & splat(0x80000000)); value_t b = eval(splat(0)); for (u32 i = 0; i < 4; i++) { const auto eval_fraction = eval(extract(a_fraction, i)); - const auto eval_exponent = eval(extract(a_exponent, i)); - const auto eval_sign = eval(extract(a_sign, i)); + value_t r_fraction = load_const(m_spu_frest_fraction_lut, eval_fraction); - value_t r_exponent = load_const(m_spu_frest_exponent_lut, eval_exponent); - b = eval(insert(b, i, eval(r_fraction | eval_sign | r_exponent))); + + b = eval(insert(b, i, r_fraction)); } + b = eval(b | fix_exponent | a_sign); + const auto base = (b & 0x007ffc00u) << 9; // Base fraction const auto ymul = (b & 0x3ff) * (a & 0x7ffff); // Step fraction * Y fraction (fixed point at 2^-32) const auto comparison = (ymul > base); // Should exponent be adjusted?