From 9441fb8b7a85405003b5d4ea8e152f0527f2f292 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 16 Nov 2013 18:27:17 -0800 Subject: [PATCH] Finally figured out MUL/ADD/SUB_CONST_* --- src/xenia/gpu/d3d11/d3d11_shader.cc | 131 +++++++++++++++++++--- src/xenia/gpu/xenos/ucode_disassembler.cc | 36 ++++-- 2 files changed, 145 insertions(+), 22 deletions(-) diff --git a/src/xenia/gpu/d3d11/d3d11_shader.cc b/src/xenia/gpu/d3d11/d3d11_shader.cc index 215306232..96624d984 100644 --- a/src/xenia/gpu/d3d11/d3d11_shader.cc +++ b/src/xenia/gpu/d3d11/d3d11_shader.cc @@ -1002,7 +1002,7 @@ int TranslateALU_MAXs( xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); ctx.output->append(" = "); - if (alu.vector_clamp) { + if (alu.scalar_clamp) { ctx.output->append("saturate("); } if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { @@ -1015,7 +1015,7 @@ int TranslateALU_MAXs( AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); ctx.output->append(".y).xxxx"); } - if (alu.vector_clamp) { + if (alu.scalar_clamp) { ctx.output->append(")"); } ctx.output->append(";\n"); @@ -1027,7 +1027,7 @@ int TranslateALU_MINs( xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); ctx.output->append(" = "); - if (alu.vector_clamp) { + if (alu.scalar_clamp) { ctx.output->append("saturate("); } ctx.output->append("min("); @@ -1035,7 +1035,7 @@ int TranslateALU_MINs( ctx.output->append(".x, "); AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); ctx.output->append(".y).xxxx"); - if (alu.vector_clamp) { + if (alu.scalar_clamp) { ctx.output->append(")"); } ctx.output->append(";\n"); @@ -1043,6 +1043,93 @@ int TranslateALU_MINs( return 0; } +int TranslateALU_MUL_CONST_0( + xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { + AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + ctx.output->append(" = "); + if (alu.scalar_clamp) { + ctx.output->append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + ctx.output->append("("); + AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + ctx.output->append(".%c * ", chan_names[swiz_a]); + AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + ctx.output->append(".%c", chan_names[swiz_b]); + ctx.output->append(").xxxx"); + if (alu.scalar_clamp) { + ctx.output->append(")"); + } + ctx.output->append(";\n"); + AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} +int TranslateALU_MUL_CONST_1( + xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { + return TranslateALU_MUL_CONST_0(ctx, alu); +} + +int TranslateALU_ADD_CONST_0( + xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { + AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + ctx.output->append(" = "); + if (alu.scalar_clamp) { + ctx.output->append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + ctx.output->append("("); + AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + ctx.output->append(".%c + ", chan_names[swiz_a]); + AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + ctx.output->append(".%c", chan_names[swiz_b]); + ctx.output->append(").xxxx"); + if (alu.scalar_clamp) { + ctx.output->append(")"); + } + ctx.output->append(";\n"); + AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} +int TranslateALU_ADD_CONST_1( + xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { + return TranslateALU_ADD_CONST_0(ctx, alu); +} + +int TranslateALU_SUB_CONST_0( + xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { + AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + ctx.output->append(" = "); + if (alu.scalar_clamp) { + ctx.output->append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + ctx.output->append("("); + AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + ctx.output->append(".%c - ", chan_names[swiz_a]); + AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + ctx.output->append(".%c", chan_names[swiz_b]); + ctx.output->append(").xxxx"); + if (alu.scalar_clamp) { + ctx.output->append(")"); + } + ctx.output->append(";\n"); + AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} +int TranslateALU_SUB_CONST_1( + xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { + return TranslateALU_SUB_CONST_0(ctx, alu); +} + typedef int (*xe_gpu_translate_alu_fn)( xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu); typedef struct { @@ -1129,12 +1216,12 @@ static xe_gpu_translate_alu_info_t scalar_alu_instrs[0x40] = { ALU_INSTR(KILLONEs, 1), // 39 ALU_INSTR(SQRT_IEEE, 1), // 40 { 0, 0, false }, - ALU_INSTR(MUL_CONST_0, 2), // 42 - ALU_INSTR(MUL_CONST_1, 2), // 43 - ALU_INSTR(ADD_CONST_0, 2), // 44 - ALU_INSTR(ADD_CONST_1, 2), // 45 - ALU_INSTR(SUB_CONST_0, 2), // 46 - ALU_INSTR(SUB_CONST_1, 2), // 47 + ALU_INSTR_IMPL(MUL_CONST_0, 2), // 42 + ALU_INSTR_IMPL(MUL_CONST_1, 2), // 43 + ALU_INSTR_IMPL(ADD_CONST_0, 2), // 44 + ALU_INSTR_IMPL(ADD_CONST_1, 2), // 45 + ALU_INSTR_IMPL(SUB_CONST_0, 2), // 46 + ALU_INSTR_IMPL(SUB_CONST_1, 2), // 47 ALU_INSTR(SIN, 1), // 48 ALU_INSTR(COS, 1), // 49 ALU_INSTR(RETAIN_PREV, 1), // 50 @@ -1213,10 +1300,26 @@ int TranslateALU( print_dstreg(output, alu->scalar_dest, alu->scalar_write_mask, alu->export_data); output->append(" = "); - print_srcreg(output, - alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->src3_reg_abs); - // TODO ADD/MUL must have another src?!? + if (is.num_srcs == 2) { + // ADD_CONST_0 dest, [const], [reg] + uint32_t src3_swiz = alu->src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + print_srcreg(output, + alu->src3_reg, 0, 0, + alu->src3_reg_negate, alu->src3_reg_abs); + output->append(".%c", chan_names[swiz_a]); + output->append(", "); + uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1); + print_srcreg(output, + reg2, 1, 0, + alu->src3_reg_negate, alu->src3_reg_abs); + output->append(".%c", chan_names[swiz_b]); + } else { + print_srcreg(output, + alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->src3_reg_abs); + } if (alu->scalar_clamp) { output->append(" CLAMP"); } diff --git a/src/xenia/gpu/xenos/ucode_disassembler.cc b/src/xenia/gpu/xenos/ucode_disassembler.cc index 90a6ac2ce..6a062f817 100644 --- a/src/xenia/gpu/xenos/ucode_disassembler.cc +++ b/src/xenia/gpu/xenos/ucode_disassembler.cc @@ -303,22 +303,42 @@ int disasm_alu( if (alu->scalar_write_mask || !alu->vector_write_mask) { // 2nd optional scalar op: - output->append("%s", levels[level]); - output->append(" \t\t\t\t\t"); + if (alu->vector_write_mask) { + output->append("%s", levels[level]); + output->append(" \t\t\t\t\t\t \t"); + } if (scalar_instructions[alu->scalar_opc].name) { - output->append("\t \t%s\t", scalar_instructions[alu->scalar_opc].name); + output->append("%s\t", scalar_instructions[alu->scalar_opc].name); } else { - output->append("\t \tOP(%u)\t", alu->scalar_opc); + output->append("OP(%u)\t", alu->scalar_opc); } print_dstreg(output, alu->scalar_dest, alu->scalar_write_mask, alu->export_data); output->append(" = "); - print_srcreg(output, - alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->src3_reg_abs); - // TODO ADD/MUL must have another src?!? + if (scalar_instructions[alu->scalar_opc].num_srcs == 2) { + // MUL/ADD/etc + // Clever, CONST_0 and CONST_1 are just an extra storage bit. + // ADD_CONST_0 dest, [const], [reg] + uint32_t src3_swiz = alu->src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + print_srcreg(output, + alu->src3_reg, 0, 0, + alu->src3_reg_negate, alu->src3_reg_abs); + output->append(".%c", chan_names[swiz_a]); + output->append(", "); + uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1); + print_srcreg(output, + reg2, 1, 0, + alu->src3_reg_negate, alu->src3_reg_abs); + output->append(".%c", chan_names[swiz_b]); + } else { + print_srcreg(output, + alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->src3_reg_abs); + } if (alu->scalar_clamp) { output->append(" CLAMP"); }