diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index c00146f815..819d040eb1 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -76,6 +76,9 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask) { AddCode(m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";"); } + + u32 reg_index = dst.fp16 ? dst.dest_reg >> 1 : dst.dest_reg; + temp_registers[reg_index].tag(dst.dest_reg, !!dst.fp16); } void FragmentProgramDecompiler::AddFlowOp(std::string code) @@ -339,6 +342,30 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) switch (src.reg_type) { case RSX_FP_REGISTER_TYPE_TEMP: + + if (!src.fp16) + { + if (dst.opcode == RSX_FP_OPCODE_UP16 || + dst.opcode == RSX_FP_OPCODE_UP2 || + dst.opcode == RSX_FP_OPCODE_UP4 || + dst.opcode == RSX_FP_OPCODE_UPB || + dst.opcode == RSX_FP_OPCODE_UPG) + { + //TODO: Implement aliased gather for half floats + bool xy_read = false; + bool zw_read = false; + + if (src.swizzle_x < 2 || src.swizzle_y < 2 || src.swizzle_z < 2 || src.swizzle_w < 2) + xy_read = true; + if (src.swizzle_x > 1 || src.swizzle_y > 1 || src.swizzle_z > 1 || src.swizzle_w > 1) + zw_read = true; + + auto ® = temp_registers[src.tmp_reg_index]; + if (reg.requires_gather(xy_read, zw_read)) + AddCode(reg.gather_r()); + } + } + ret += AddReg(src.tmp_reg_index, src.fp16); break; @@ -424,6 +451,27 @@ std::string FragmentProgramDecompiler::BuildCode() OS << std::endl; insertOutputs(OS); OS << std::endl; + + //TODO: Better organization for this + std::string float2 = getFloatTypeName(2); + std::string float4 = getFloatTypeName(4); + + OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n"; + OS << "{\n"; + OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n"; + OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n"; + OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n"; + OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n"; + OS << " return " << float4 << "(x, y, z, w);\n"; + OS << "}\n\n"; + + OS << float2 << " gather(" << float4 << " _h)\n"; + OS << "{\n"; + OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n"; + OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n"; + OS << " return " << float2 << "(x, y);\n"; + OS << "}\n\n"; + insertMainStart(OS); OS << main << std::endl; insertMainEnd(OS); diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h index b013cf5d1b..f2d502aa60 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h @@ -19,6 +19,85 @@ */ class FragmentProgramDecompiler { + struct temp_register + { + bool aliased_r0 = false; + bool aliased_h0 = false; + bool aliased_h1 = false; + bool last_write_half = false; + + u32 real_index = UINT32_MAX; + + void tag(u32 index, bool half_register) + { + if (half_register) + { + last_write_half = true; + + if (index & 1) + aliased_h1 = true; + else + aliased_h0 = true; + } + else + { + aliased_r0 = true; + last_write_half = false; + } + + if (real_index == UINT32_MAX) + { + if (half_register) + real_index = index >> 1; + else + real_index = index; + } + } + + bool requires_gather(bool xy, bool zw) const + { + //Data fetched from the single precision register requires merging of the two half registers + //TODO: Check individual swizzle channels + if (aliased_h0 && xy || aliased_h1 && zw) + return last_write_half; + + return false; + } + + bool requires_split(u32 /*index*/) const + { + //Data fetched from any of the two half registers requires sync with the full register + if (!last_write_half && aliased_r0) + { + //r0 has been written to + //TODO: Check for specific elements in real32 register + return true; + } + + return false; + } + + std::string gather_r() + { + std::string h0 = "h" + std::to_string(real_index << 1); + std::string h1 = "h" + std::to_string(real_index << 1 | 1); + std::string reg = "r" + std::to_string(real_index); + std::string ret = "//Invalid gather"; + + if (aliased_h0 && aliased_h1) + ret = reg + " = gather(" + h0 + ", " + h1 + ");"; + else if (aliased_h0) + ret = reg + ".xy = gather(" + h0 + ");"; + else if (aliased_h1) + ret = reg + ".zw = gather(" + h1 + ");"; + + last_write_half = false; + aliased_h0 = false; + aliased_h1 = false; + return ret; + } + }; + OPDEST dst; SRC0 src0; SRC1 src1; @@ -35,6 +114,8 @@ class FragmentProgramDecompiler std::vector m_end_offsets; std::vector m_else_offsets; + std::array temp_registers; + std::string GetMask(); void SetDst(std::string code, bool append_mask = true); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp index c59fdac051..d844cfbaa0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp @@ -184,12 +184,12 @@ void insert_d3d12_legacy_function(std::ostream& OS, bool is_fragment_program) **/ OS << "uint packHalf2x16(float2 val)"; OS << "{\n"; - OS << " return packSnorm2x16(val / 6.1E+5);\n"; + OS << " return packSnorm2x16(val / 65504.);\n"; OS << "}\n\n"; OS << "float2 unpackHalf2x16(uint val)"; OS << "{\n"; - OS << " return unpackSnorm2x16(val) * 6.1E+5;\n"; + OS << " return unpackSnorm2x16(val) * 65504.;\n"; OS << "}\n\n"; OS << "float read_value(float4 src, uint remap_index)\n";