From 7bb61b8099197a6de85a13a1479c737886a0d215 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 3 Nov 2014 20:59:08 -0800 Subject: [PATCH] Fixing some shader stuff. What a mess. --- .../gpu/d3d11/d3d11_shader_translator.cc | 227 +++++++++--------- src/xenia/gpu/xenos/ucode.h | 18 +- src/xenia/gpu/xenos/ucode_disassembler.cc | 42 ++-- 3 files changed, 152 insertions(+), 135 deletions(-) diff --git a/src/xenia/gpu/d3d11/d3d11_shader_translator.cc b/src/xenia/gpu/d3d11/d3d11_shader_translator.cc index 213608de9..e69d6a316 100644 --- a/src/xenia/gpu/d3d11/d3d11_shader_translator.cc +++ b/src/xenia/gpu/d3d11/d3d11_shader_translator.cc @@ -167,7 +167,7 @@ int D3D11ShaderTranslator::TranslateVertexShader( // Always write position, as some shaders seem to only write certain values. if (alloc_counts.positions) { append( - " o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n"); + " o.oPos = float4(0.0, 0.0, 0.0, 1.0);\n"); } if (alloc_counts.point_size) { append( @@ -365,19 +365,28 @@ static const char chan_names[] = { void D3D11ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, - uint32_t abs) { + uint32_t abs_constants) { if (negate) { append("-"); } - if (abs) { - append("abs("); - } if (type) { // Register. - append("r%u", num); + if (num & 0x80) { + append("abs("); + } + append("r%u", num & 0x7F); + if (num & 0x80) { + append(")"); + } } else { // Constant. + if (abs_constants) { + append("abs("); + } append("c[%u]", type_ == XE_GPU_SHADER_TYPE_PIXEL ? num + 256 : num); + if (abs_constants) { + append(")"); + } } if (swiz) { append("."); @@ -386,9 +395,6 @@ void D3D11ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type, swiz >>= 2; } } - if (abs) { - append(")"); - } } void D3D11ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) { @@ -466,14 +472,28 @@ void D3D11ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask, void D3D11ShaderTranslator::PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, - uint32_t abs) { + uint32_t abs_constants) { if (negate) { append("-"); } - if (abs) { - append("|"); + if (type) { + if (num & 0x80) { + append("|"); + } + append("R%u", num & 0x7F); + if (num & 0x80) { + append("|"); + } + } else { + if (abs_constants) { + append("|"); + } + num += type_ == XE_GPU_SHADER_TYPE_PIXEL ? 256 : 0; + append("C%u", num); + if (abs_constants) { + append("|"); + } } - append("%c%u", type ? 'R' : 'C', num); if (swiz) { append("."); for (int i = 0; i < 4; i++) { @@ -481,9 +501,6 @@ void D3D11ShaderTranslator::PrintSrcReg(uint32_t num, uint32_t type, swiz >>= 2; } } - if (abs) { - append("|"); - } } void D3D11ShaderTranslator::PrintDstReg(uint32_t num, uint32_t mask, @@ -528,9 +545,9 @@ int D3D11ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) { append("saturate("); } append("("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(" + "); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(")"); if (alu.vector_clamp) { append(")"); @@ -547,9 +564,9 @@ int D3D11ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) { append("saturate("); } append("("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(" * "); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(")"); if (alu.vector_clamp) { append(")"); @@ -568,15 +585,14 @@ int D3D11ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) { if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel && alu.src1_swiz == alu.src2_swiz && - alu.src1_reg_negate == alu.src2_reg_negate && - alu.src1_reg_abs == alu.src2_reg_abs) { + alu.src1_reg_negate == alu.src2_reg_negate) { // This is a mov. - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); } else { append("max("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(", "); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(")"); } if (alu.vector_clamp) { @@ -594,9 +610,9 @@ int D3D11ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) { append("saturate("); } append("min("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(", "); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(")"); if (alu.vector_clamp) { append(")"); @@ -613,21 +629,21 @@ int D3D11ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu, const cha append("saturate("); } append("float4(("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").x %s (", op); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").x ? 1.0 : 0.0, ("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").y %s (", op); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").y ? 1.0 : 0.0, ("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").z %s (", op); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").z ? 1.0 : 0.0, ("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").w %s (", op); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").w ? 1.0 : 0.0)"); if (alu.vector_clamp) { append(")"); @@ -656,7 +672,7 @@ int D3D11ShaderTranslator::TranslateALU_FRACv(const instr_alu_t& alu) { append("saturate("); } append("frac("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(")"); if (alu.vector_clamp) { append(")"); @@ -673,7 +689,7 @@ int D3D11ShaderTranslator::TranslateALU_TRUNCv(const instr_alu_t& alu) { append("saturate("); } append("trunc("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(")"); if (alu.vector_clamp) { append(")"); @@ -690,7 +706,7 @@ int D3D11ShaderTranslator::TranslateALU_FLOORv(const instr_alu_t& alu) { append("saturate("); } append("floor("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(")"); if (alu.vector_clamp) { append(")"); @@ -707,11 +723,11 @@ int D3D11ShaderTranslator::TranslateALU_MULADDv(const instr_alu_t& alu) { append("saturate("); } append("mad("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(", "); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(", "); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(")"); if (alu.vector_clamp) { append(")"); @@ -729,29 +745,29 @@ int D3D11ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu, const cha } // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as values. append("float4(("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").x %s 0.0 ? (", op); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").x : ("); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(").x, ("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").y %s 0.0 ? (", op); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").y : ("); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(").y, ("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").z %s 0.0 ? (", op); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").z : ("); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(").z, ("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").w %s 0.0 ? (", op); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").w : ("); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(").w)"); if (alu.vector_clamp) { append(")"); @@ -777,9 +793,9 @@ int D3D11ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) { append("saturate("); } append("dot("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(", "); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(")"); if (alu.vector_clamp) { append(")"); @@ -796,9 +812,9 @@ int D3D11ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) { append("saturate("); } append("dot(float4("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").xyz, float4("); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").xyz)"); if (alu.vector_clamp) { append(")"); @@ -815,11 +831,11 @@ int D3D11ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) { append("saturate("); } append("dot(float4("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(").xy, float4("); - AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.abs_constants); append(").xy) + "); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(".x"); if (alu.vector_clamp) { append(")"); @@ -840,13 +856,13 @@ int D3D11ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) { append("max("); append("max("); append("max("); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(".x, "); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(".y), "); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(".z), "); - AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.abs_constants); append(".w)"); if (alu.vector_clamp) { append(")"); @@ -859,62 +875,62 @@ int D3D11ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) { // ... int D3D11ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) { - AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); append(" = "); if (alu.scalar_clamp) { append("saturate("); } if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { // This is a mov. - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); } else { append("max("); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(".x, "); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(".y).xxxx"); } if (alu.scalar_clamp) { append(")"); } append(";\n"); - AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); return 0; } int D3D11ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) { - AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); append(" = "); if (alu.scalar_clamp) { append("saturate("); } append("min("); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(".x, "); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(".y).xxxx"); if (alu.scalar_clamp) { append(")"); } append(";\n"); - AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); return 0; } int D3D11ShaderTranslator::TranslateALU_SETXXs(const instr_alu_t& alu, const char* op) { - AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); append(" = "); if (alu.scalar_clamp) { append("saturate("); } append("(("); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op); if (alu.scalar_clamp) { append(")"); } append(";\n"); - AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); return 0; } int D3D11ShaderTranslator::TranslateALU_SETEs(const instr_alu_t& alu) { @@ -931,24 +947,24 @@ int D3D11ShaderTranslator::TranslateALU_SETNEs(const instr_alu_t& alu) { } int D3D11ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) { - AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); append(" = "); if (alu.scalar_clamp) { append("saturate("); } append("(1.0 / "); - AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.abs_constants); append(")"); if (alu.scalar_clamp) { append(")"); } append(";\n"); - AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); return 0; } int D3D11ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) { - AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); append(" = "); if (alu.scalar_clamp) { append("saturate("); @@ -958,16 +974,16 @@ int D3D11ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) { uint32_t swiz_b = (src3_swiz & 0x3); uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); append("("); - AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); append(".%c * ", chan_names[swiz_a]); - AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); append(".%c", chan_names[swiz_b]); append(").xxxx"); if (alu.scalar_clamp) { append(")"); } append(";\n"); - AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); return 0; } int D3D11ShaderTranslator::TranslateALU_MUL_CONST_1(const instr_alu_t& alu) { @@ -975,7 +991,7 @@ int D3D11ShaderTranslator::TranslateALU_MUL_CONST_1(const instr_alu_t& alu) { } int D3D11ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) { - AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); append(" = "); if (alu.scalar_clamp) { append("saturate("); @@ -985,16 +1001,16 @@ int D3D11ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) { uint32_t swiz_b = (src3_swiz & 0x3); uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); append("("); - AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); append(".%c + ", chan_names[swiz_a]); - AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); append(".%c", chan_names[swiz_b]); append(").xxxx"); if (alu.scalar_clamp) { append(")"); } append(";\n"); - AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); return 0; } int D3D11ShaderTranslator::TranslateALU_ADD_CONST_1(const instr_alu_t& alu) { @@ -1002,7 +1018,7 @@ int D3D11ShaderTranslator::TranslateALU_ADD_CONST_1(const instr_alu_t& alu) { } int D3D11ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) { - AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); append(" = "); if (alu.scalar_clamp) { append("saturate("); @@ -1012,16 +1028,16 @@ int D3D11ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) { uint32_t swiz_b = (src3_swiz & 0x3); uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); append("("); - AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); append(".%c - ", chan_names[swiz_a]); - AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); append(".%c", chan_names[swiz_b]); append(").xxxx"); if (alu.scalar_clamp) { append(")"); } append(";\n"); - AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, alu.export_data); return 0; } int D3D11ShaderTranslator::TranslateALU_SUB_CONST_1(const instr_alu_t& alu) { @@ -1157,15 +1173,15 @@ int D3D11ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) { append(" = "); if (iv.num_srcs == 3) { PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->src3_reg_abs); + alu->src3_reg_negate, alu->abs_constants); append(", "); } PrintSrcReg(alu->src1_reg, alu->src1_sel, alu->src1_swiz, - alu->src1_reg_negate, alu->src1_reg_abs); + alu->src1_reg_negate, alu->abs_constants); if (iv.num_srcs > 1) { append(", "); PrintSrcReg(alu->src2_reg, alu->src2_sel, alu->src2_swiz, - alu->src2_reg_negate, alu->src2_reg_abs); + alu->src2_reg_negate, alu->abs_constants); } if (alu->vector_clamp) { append(" CLAMP"); @@ -1198,7 +1214,7 @@ int D3D11ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) { } else { append("\t \tOP(%u)\t", alu->scalar_opc); } - PrintDstReg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data); + PrintDstReg(get_alu_scalar_dest(*alu), alu->scalar_write_mask, alu->export_data); append(" = "); if (is.num_srcs == 2) { // ADD_CONST_0 dest, [const], [reg] @@ -1206,22 +1222,22 @@ int D3D11ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) { uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; uint32_t swiz_b = (src3_swiz & 0x3); PrintSrcReg(alu->src3_reg, 0, 0, - alu->src3_reg_negate, alu->src3_reg_abs); + alu->src3_reg_negate, alu->abs_constants); append(".%c", chan_names[swiz_a]); append(", "); uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1); PrintSrcReg(reg2, 1, 0, - alu->src3_reg_negate, alu->src3_reg_abs); + alu->src3_reg_negate, alu->abs_constants); append(".%c", chan_names[swiz_b]); } else { PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->src3_reg_abs); + alu->src3_reg_negate, alu->abs_constants); } if (alu->scalar_clamp) { append(" CLAMP"); } if (alu->export_data) { - PrintExportComment(alu->scalar_dest); + PrintExportComment(get_alu_scalar_dest(*alu)); } append("\n"); @@ -1616,19 +1632,10 @@ int D3D11ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex, append("%c", chan_names[src_swiz & 0x3]); src_swiz >>= 2; } - append(")."); - - // Pass one over dest does xyzw and fakes the special values. - // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc - uint32_t dst_swiz = tex->dst_swiz; - for (int i = 0; i < 4; i++) { - append("%c", chan_names[dst_swiz & 0x3]); - dst_swiz >>= 3; - } - append(";\n"); + append(");\n"); append(" r%u.xyzw = float4(", tex->dst_reg); - dst_swiz = tex->dst_swiz; + uint32_t dst_swiz = tex->dst_swiz; for (int i = 0; i < 4; i++) { if (i) { append(", "); diff --git a/src/xenia/gpu/xenos/ucode.h b/src/xenia/gpu/xenos/ucode.h index eee9d58d0..1e656d1ca 100644 --- a/src/xenia/gpu/xenos/ucode.h +++ b/src/xenia/gpu/xenos/ucode.h @@ -215,7 +215,7 @@ XEPACKEDSTRUCT(instr_alu_t, { XEPACKEDSTRUCTANONYMOUS({ uint32_t vector_dest : 6; uint32_t vector_dest_rel : 1; - uint32_t low_precision_16b_fp : 1; + uint32_t abs_constants : 1; uint32_t scalar_dest : 6; uint32_t scalar_dest_rel : 1; uint32_t export_data : 1; @@ -240,15 +240,9 @@ XEPACKEDSTRUCT(instr_alu_t, { }); /* dword2: */ XEPACKEDSTRUCTANONYMOUS({ - uint32_t src3_reg : 6; - uint32_t src3_reg_select : 1; - uint32_t src3_reg_abs : 1; - uint32_t src2_reg : 6; - uint32_t src2_reg_select : 1; - uint32_t src2_reg_abs : 1; - uint32_t src1_reg : 6; - uint32_t src1_reg_select : 1; - uint32_t src1_reg_abs : 1; + uint32_t src3_reg : 8; + uint32_t src2_reg : 8; + uint32_t src1_reg : 8; uint32_t vector_opc : 5; // instr_vector_opc_t uint32_t src3_sel : 1; uint32_t src2_sel : 1; @@ -256,6 +250,10 @@ XEPACKEDSTRUCT(instr_alu_t, { }); }); +inline uint32_t get_alu_scalar_dest(const instr_alu_t& alu) { + return alu.vector_write_mask ? alu.scalar_dest : alu.vector_dest; +} + /* * CF instructions: */ diff --git a/src/xenia/gpu/xenos/ucode_disassembler.cc b/src/xenia/gpu/xenos/ucode_disassembler.cc index 92f7ad480..01bd23093 100644 --- a/src/xenia/gpu/xenos/ucode_disassembler.cc +++ b/src/xenia/gpu/xenos/ucode_disassembler.cc @@ -97,14 +97,29 @@ static const char chan_names[] = { void print_srcreg( Output* output, uint32_t num, uint32_t type, - uint32_t swiz, uint32_t negate, uint32_t abs) { + uint32_t swiz, uint32_t negate, uint32_t abs_constants, + XE_GPU_SHADER_TYPE shader_type) { if (negate) { output->append("-"); } - if (abs) { - output->append("|"); + if (type) { + if (num & 0x80) { + output->append("abs("); + } + output->append("R%u", num & 0x7F); + if (num & 0x80) { + output->append(")"); + } + } else { + if (abs_constants) { + output->append("|"); + } + num += shader_type == XE_GPU_SHADER_TYPE_PIXEL ? 256 : 0; + output->append("C%u", num); + if (abs_constants) { + output->append("|"); + } } - output->append("%c%u", type ? 'R' : 'C', num); if (swiz) { output->append("."); for (int i = 0; i < 4; i++) { @@ -112,9 +127,6 @@ void print_srcreg( swiz >>= 2; } } - if (abs) { - output->append("|"); - } } void print_dstreg( @@ -275,17 +287,17 @@ int disasm_alu( if (vector_instructions[alu->vector_opc].num_srcs == 3) { print_srcreg(output, alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->src3_reg_abs); + alu->src3_reg_negate, alu->abs_constants, type); output->append(", "); } print_srcreg(output, alu->src1_reg, alu->src1_sel, alu->src1_swiz, - alu->src1_reg_negate, alu->src1_reg_abs); + alu->src1_reg_negate, alu->abs_constants, type); if (vector_instructions[alu->vector_opc].num_srcs > 1) { output->append(", "); print_srcreg(output, alu->src2_reg, alu->src2_sel, alu->src2_swiz, - alu->src2_reg_negate, alu->src2_reg_abs); + alu->src2_reg_negate, alu->abs_constants, type); } if (alu->vector_clamp) { @@ -314,7 +326,7 @@ int disasm_alu( } print_dstreg(output, - alu->scalar_dest, alu->scalar_write_mask, alu->export_data); + get_alu_scalar_dest(*alu), alu->scalar_write_mask, alu->export_data); output->append(" = "); if (scalar_instructions[alu->scalar_opc].num_srcs == 2) { // MUL/ADD/etc @@ -325,24 +337,24 @@ int disasm_alu( uint32_t swiz_b = (src3_swiz & 0x3); print_srcreg(output, alu->src3_reg, 0, 0, - alu->src3_reg_negate, alu->src3_reg_abs); + alu->src3_reg_negate, alu->abs_constants, type); output->append(".%c", chan_names[swiz_a]); output->append(", "); uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1); print_srcreg(output, reg2, 1, 0, - alu->src3_reg_negate, alu->src3_reg_abs); + alu->src3_reg_negate, alu->abs_constants, type); output->append(".%c", chan_names[swiz_b]); } else { print_srcreg(output, alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->src3_reg_abs); + alu->src3_reg_negate, alu->abs_constants, type); } if (alu->scalar_clamp) { output->append(" CLAMP"); } if (alu->export_data) { - print_export_comment(output, alu->scalar_dest, type); + print_export_comment(output, get_alu_scalar_dest(*alu), type); } output->append("\n"); }