rsx/fp: Implement register gather (only for UP(X) instructions)

- Workaround for temp register aliasing between H and R variants
- TODO: Implement temp regs as 128 bit-blocks with r/w as pack/unpack
This commit is contained in:
kd-11 2017-11-30 21:47:25 +03:00
parent 44e34064de
commit fe9090bd39
3 changed files with 131 additions and 2 deletions

View File

@ -76,6 +76,9 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask)
{
AddCode(m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";");
}
u32 reg_index = dst.fp16 ? dst.dest_reg >> 1 : dst.dest_reg;
temp_registers[reg_index].tag(dst.dest_reg, !!dst.fp16);
}
void FragmentProgramDecompiler::AddFlowOp(std::string code)
@ -339,6 +342,30 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
switch (src.reg_type)
{
case RSX_FP_REGISTER_TYPE_TEMP:
if (!src.fp16)
{
if (dst.opcode == RSX_FP_OPCODE_UP16 ||
dst.opcode == RSX_FP_OPCODE_UP2 ||
dst.opcode == RSX_FP_OPCODE_UP4 ||
dst.opcode == RSX_FP_OPCODE_UPB ||
dst.opcode == RSX_FP_OPCODE_UPG)
{
//TODO: Implement aliased gather for half floats
bool xy_read = false;
bool zw_read = false;
if (src.swizzle_x < 2 || src.swizzle_y < 2 || src.swizzle_z < 2 || src.swizzle_w < 2)
xy_read = true;
if (src.swizzle_x > 1 || src.swizzle_y > 1 || src.swizzle_z > 1 || src.swizzle_w > 1)
zw_read = true;
auto &reg = temp_registers[src.tmp_reg_index];
if (reg.requires_gather(xy_read, zw_read))
AddCode(reg.gather_r());
}
}
ret += AddReg(src.tmp_reg_index, src.fp16);
break;
@ -424,6 +451,27 @@ std::string FragmentProgramDecompiler::BuildCode()
OS << std::endl;
insertOutputs(OS);
OS << std::endl;
//TODO: Better organization for this
std::string float2 = getFloatTypeName(2);
std::string float4 = getFloatTypeName(4);
OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n";
OS << "{\n";
OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n";
OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n";
OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n";
OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n";
OS << " return " << float4 << "(x, y, z, w);\n";
OS << "}\n\n";
OS << float2 << " gather(" << float4 << " _h)\n";
OS << "{\n";
OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n";
OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n";
OS << " return " << float2 << "(x, y);\n";
OS << "}\n\n";
insertMainStart(OS);
OS << main << std::endl;
insertMainEnd(OS);

View File

@ -19,6 +19,85 @@
*/
class FragmentProgramDecompiler
{
struct temp_register
{
bool aliased_r0 = false;
bool aliased_h0 = false;
bool aliased_h1 = false;
bool last_write_half = false;
u32 real_index = UINT32_MAX;
void tag(u32 index, bool half_register)
{
if (half_register)
{
last_write_half = true;
if (index & 1)
aliased_h1 = true;
else
aliased_h0 = true;
}
else
{
aliased_r0 = true;
last_write_half = false;
}
if (real_index == UINT32_MAX)
{
if (half_register)
real_index = index >> 1;
else
real_index = index;
}
}
bool requires_gather(bool xy, bool zw) const
{
//Data fetched from the single precision register requires merging of the two half registers
//TODO: Check individual swizzle channels
if (aliased_h0 && xy || aliased_h1 && zw)
return last_write_half;
return false;
}
bool requires_split(u32 /*index*/) const
{
//Data fetched from any of the two half registers requires sync with the full register
if (!last_write_half && aliased_r0)
{
//r0 has been written to
//TODO: Check for specific elements in real32 register
return true;
}
return false;
}
std::string gather_r()
{
std::string h0 = "h" + std::to_string(real_index << 1);
std::string h1 = "h" + std::to_string(real_index << 1 | 1);
std::string reg = "r" + std::to_string(real_index);
std::string ret = "//Invalid gather";
if (aliased_h0 && aliased_h1)
ret = reg + " = gather(" + h0 + ", " + h1 + ");";
else if (aliased_h0)
ret = reg + ".xy = gather(" + h0 + ");";
else if (aliased_h1)
ret = reg + ".zw = gather(" + h1 + ");";
last_write_half = false;
aliased_h0 = false;
aliased_h1 = false;
return ret;
}
};
OPDEST dst;
SRC0 src0;
SRC1 src1;
@ -35,6 +114,8 @@ class FragmentProgramDecompiler
std::vector<u32> m_end_offsets;
std::vector<u32> m_else_offsets;
std::array<temp_register, 24> temp_registers;
std::string GetMask();
void SetDst(std::string code, bool append_mask = true);

View File

@ -184,12 +184,12 @@ void insert_d3d12_legacy_function(std::ostream& OS, bool is_fragment_program)
**/
OS << "uint packHalf2x16(float2 val)";
OS << "{\n";
OS << " return packSnorm2x16(val / 6.1E+5);\n";
OS << " return packSnorm2x16(val / 65504.);\n";
OS << "}\n\n";
OS << "float2 unpackHalf2x16(uint val)";
OS << "{\n";
OS << " return unpackSnorm2x16(val) * 6.1E+5;\n";
OS << " return unpackSnorm2x16(val) * 65504.;\n";
OS << "}\n\n";
OS << "float read_value(float4 src, uint remap_index)\n";