From e5727e7e2a5e1ae96c9cf9e72519111eb3e67243 Mon Sep 17 00:00:00 2001 From: dariosamo Date: Sun, 21 Jun 2015 14:09:41 -0300 Subject: [PATCH] GL4 Translator: ADDs/MULs/SUBs changed to XZ instead of XY. Basic implementation of loops for FLOW_CONTROL. --- src/xenia/gpu/gl4/gl4_shader_translator.cc | 55 ++++++++++++++++++++-- src/xenia/gpu/gl4/gl4_shader_translator.h | 2 + 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.cc b/src/xenia/gpu/gl4/gl4_shader_translator.cc index 91ee56766..1cbd15c9e 100644 --- a/src/xenia/gpu/gl4/gl4_shader_translator.cc +++ b/src/xenia/gpu/gl4/gl4_shader_translator.cc @@ -105,6 +105,17 @@ std::string GL4ShaderTranslator::TranslateVertexShader( for (uint32_t n = 0; n <= temp_regs; n++) { Append(" vec4 r%d = state.float_consts[%d];\n", n, n); } + +#if FLOW_CONTROL + // Add temporary integer registers for loops that we may use. + // Each loop uses an address, counter, and constant + // TODO: Implement only for the used loops in the shader + for (uint32_t n = 0; n < 32; n++) { + Append(" int i%d_cnt = 0;\n", n); + Append(" int i%d_addr = 0;\n", n); + } +#endif + Append(" vec4 t;\n"); Append(" vec4 pv;\n"); // Previous Vector result. Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV). @@ -197,8 +208,14 @@ void GL4ShaderTranslator::AppendSrcReg(const instr_alu_t& op, uint32_t num, Append("abs("); } Append("state.float_consts["); +#if FLOW_CONTROL + // NOTE(dariosamo): Some games don't seem to take into account the relative a0 + // offset even when they should due to const_slot being a different value. + if (op.const_0_rel_abs || op.const_1_rel_abs) { +#else if ((const_slot == 0 && op.const_0_rel_abs) || (const_slot == 1 && op.const_1_rel_abs)) { +#endif if (op.relative_addr) { assert_true(num < 256); Append("a0 + %u", is_pixel_shader() ? num + 256 : num); @@ -783,7 +800,7 @@ bool GL4ShaderTranslator::TranslateALU_ADDs(const instr_alu_t& alu) { AppendScalarOpSrcReg(alu, 3); Append(".x + "); AppendScalarOpSrcReg(alu, 3); - Append(".y"); + Append(".z"); EndAppendScalarOp(alu); return true; } @@ -801,7 +818,7 @@ bool GL4ShaderTranslator::TranslateALU_MULs(const instr_alu_t& alu) { AppendScalarOpSrcReg(alu, 3); Append(".x * "); AppendScalarOpSrcReg(alu, 3); - Append(".y"); + Append(".z"); EndAppendScalarOp(alu); return true; } @@ -1027,7 +1044,7 @@ bool GL4ShaderTranslator::TranslateALU_SUBs(const instr_alu_t& alu) { AppendScalarOpSrcReg(alu, 3); Append(".x - "); AppendScalarOpSrcReg(alu, 3); - Append(".y"); + Append(".z"); EndAppendScalarOp(alu); return true; } @@ -1457,6 +1474,12 @@ bool GL4ShaderTranslator::TranslateBlocks(GL4Shader* shader) { } else if (cfa.opc == COND_JMP) { TranslateJmp(cfa.jmp_call); } +#if FLOW_CONTROL + else if (cfa.opc == LOOP_START) { + TranslateLoopStart(cfa.loop); + } +#endif // FLOW_CONTROL + if (cfb.opc == ALLOC) { // ? } else if (cfb.is_exec()) { @@ -1471,6 +1494,12 @@ bool GL4ShaderTranslator::TranslateBlocks(GL4Shader* shader) { } else if (cfb.opc == COND_JMP) { TranslateJmp(cfb.jmp_call); } +#if FLOW_CONTROL + else if (cfb.opc == LOOP_END) { + TranslateLoopEnd(cfb.loop); + } +#endif + if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { break; } @@ -1654,6 +1683,26 @@ bool GL4ShaderTranslator::TranslateJmp(const ucode::instr_cf_jmp_call_t& cf) { return true; } +bool GL4ShaderTranslator::TranslateLoopStart(const ucode::instr_cf_loop_t& cf) { + Append(" // %s", cf_instructions[cf.opc].name); + Append(" ADDR(0x%x) LOOP ID(%d)", cf.address, cf.loop_id); + if (cf.address_mode == ABSOLUTE_ADDR) { + Append(" ABSOLUTE_ADDR"); + } + Append("\n"); + Append(" i%d_addr = pc;\n", cf.loop_id); + Append(" i%d_cnt = 0;\n", cf.loop_id); + return true; +} + +bool GL4ShaderTranslator::TranslateLoopEnd(const ucode::instr_cf_loop_t& cf) { + Append(" // %s", cf_instructions[cf.opc].name); + Append(" ADDR(0x%x) LOOP ID(%d)\n", cf.address, cf.loop_id); + Append(" i%d_cnt = i%d_cnt + 1;\n", cf.loop_id, cf.loop_id); + Append(" pc = (i%d_cnt < state.loop_consts[%d]) ? i%d_addr : pc;\n", cf.loop_id, cf.loop_id, cf.loop_id); + return true; +} + bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx, int sync) { static const struct { diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.h b/src/xenia/gpu/gl4/gl4_shader_translator.h index 17eedd148..5e0a8d490 100644 --- a/src/xenia/gpu/gl4/gl4_shader_translator.h +++ b/src/xenia/gpu/gl4/gl4_shader_translator.h @@ -156,6 +156,8 @@ class GL4ShaderTranslator { bool TranslateBlocks(GL4Shader* shader); bool TranslateExec(const ucode::instr_cf_exec_t& cf); bool TranslateJmp(const ucode::instr_cf_jmp_call_t& cf); + bool TranslateLoopStart(const ucode::instr_cf_loop_t& cf); + bool TranslateLoopEnd(const ucode::instr_cf_loop_t& cf); bool TranslateVertexFetch(const ucode::instr_fetch_vtx_t* vtx, int sync); bool TranslateTextureFetch(const ucode::instr_fetch_tex_t* tex, int sync); };