GL4 Translator: ADDs/MULs/SUBs changed to XZ instead of XY. Basic implementation of loops for FLOW_CONTROL.

This commit is contained in:
dariosamo 2015-06-21 14:09:41 -03:00
parent 2c319db116
commit e5727e7e2a
2 changed files with 54 additions and 3 deletions

View File

@ -105,6 +105,17 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
for (uint32_t n = 0; n <= temp_regs; n++) {
Append(" vec4 r%d = state.float_consts[%d];\n", n, n);
}
#if FLOW_CONTROL
// Add temporary integer registers for loops that we may use.
// Each loop uses an address, counter, and constant
// TODO: Implement only for the used loops in the shader
for (uint32_t n = 0; n < 32; n++) {
Append(" int i%d_cnt = 0;\n", n);
Append(" int i%d_addr = 0;\n", n);
}
#endif
Append(" vec4 t;\n");
Append(" vec4 pv;\n"); // Previous Vector result.
Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV).
@ -197,8 +208,14 @@ void GL4ShaderTranslator::AppendSrcReg(const instr_alu_t& op, uint32_t num,
Append("abs(");
}
Append("state.float_consts[");
#if FLOW_CONTROL
// NOTE(dariosamo): Some games don't seem to take into account the relative a0
// offset even when they should due to const_slot being a different value.
if (op.const_0_rel_abs || op.const_1_rel_abs) {
#else
if ((const_slot == 0 && op.const_0_rel_abs) ||
(const_slot == 1 && op.const_1_rel_abs)) {
#endif
if (op.relative_addr) {
assert_true(num < 256);
Append("a0 + %u", is_pixel_shader() ? num + 256 : num);
@ -783,7 +800,7 @@ bool GL4ShaderTranslator::TranslateALU_ADDs(const instr_alu_t& alu) {
AppendScalarOpSrcReg(alu, 3);
Append(".x + ");
AppendScalarOpSrcReg(alu, 3);
Append(".y");
Append(".z");
EndAppendScalarOp(alu);
return true;
}
@ -801,7 +818,7 @@ bool GL4ShaderTranslator::TranslateALU_MULs(const instr_alu_t& alu) {
AppendScalarOpSrcReg(alu, 3);
Append(".x * ");
AppendScalarOpSrcReg(alu, 3);
Append(".y");
Append(".z");
EndAppendScalarOp(alu);
return true;
}
@ -1027,7 +1044,7 @@ bool GL4ShaderTranslator::TranslateALU_SUBs(const instr_alu_t& alu) {
AppendScalarOpSrcReg(alu, 3);
Append(".x - ");
AppendScalarOpSrcReg(alu, 3);
Append(".y");
Append(".z");
EndAppendScalarOp(alu);
return true;
}
@ -1457,6 +1474,12 @@ bool GL4ShaderTranslator::TranslateBlocks(GL4Shader* shader) {
} else if (cfa.opc == COND_JMP) {
TranslateJmp(cfa.jmp_call);
}
#if FLOW_CONTROL
else if (cfa.opc == LOOP_START) {
TranslateLoopStart(cfa.loop);
}
#endif // FLOW_CONTROL
if (cfb.opc == ALLOC) {
// ?
} else if (cfb.is_exec()) {
@ -1471,6 +1494,12 @@ bool GL4ShaderTranslator::TranslateBlocks(GL4Shader* shader) {
} else if (cfb.opc == COND_JMP) {
TranslateJmp(cfb.jmp_call);
}
#if FLOW_CONTROL
else if (cfb.opc == LOOP_END) {
TranslateLoopEnd(cfb.loop);
}
#endif
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
break;
}
@ -1654,6 +1683,26 @@ bool GL4ShaderTranslator::TranslateJmp(const ucode::instr_cf_jmp_call_t& cf) {
return true;
}
bool GL4ShaderTranslator::TranslateLoopStart(const ucode::instr_cf_loop_t& cf) {
Append(" // %s", cf_instructions[cf.opc].name);
Append(" ADDR(0x%x) LOOP ID(%d)", cf.address, cf.loop_id);
if (cf.address_mode == ABSOLUTE_ADDR) {
Append(" ABSOLUTE_ADDR");
}
Append("\n");
Append(" i%d_addr = pc;\n", cf.loop_id);
Append(" i%d_cnt = 0;\n", cf.loop_id);
return true;
}
bool GL4ShaderTranslator::TranslateLoopEnd(const ucode::instr_cf_loop_t& cf) {
Append(" // %s", cf_instructions[cf.opc].name);
Append(" ADDR(0x%x) LOOP ID(%d)\n", cf.address, cf.loop_id);
Append(" i%d_cnt = i%d_cnt + 1;\n", cf.loop_id, cf.loop_id);
Append(" pc = (i%d_cnt < state.loop_consts[%d]) ? i%d_addr : pc;\n", cf.loop_id, cf.loop_id, cf.loop_id);
return true;
}
bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
int sync) {
static const struct {

View File

@ -156,6 +156,8 @@ class GL4ShaderTranslator {
bool TranslateBlocks(GL4Shader* shader);
bool TranslateExec(const ucode::instr_cf_exec_t& cf);
bool TranslateJmp(const ucode::instr_cf_jmp_call_t& cf);
bool TranslateLoopStart(const ucode::instr_cf_loop_t& cf);
bool TranslateLoopEnd(const ucode::instr_cf_loop_t& cf);
bool TranslateVertexFetch(const ucode::instr_fetch_vtx_t* vtx, int sync);
bool TranslateTextureFetch(const ucode::instr_fetch_tex_t* tex, int sync);
};