Merge pull request #273 from DarioSamo/master
GL4 Translator: ADDs/MULs/SUBs XZ instead of XY. Loops on FLOW_CONTROL.
This commit is contained in:
commit
e8fceb0d19
|
@ -105,6 +105,17 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
|
|||
for (uint32_t n = 0; n <= temp_regs; n++) {
|
||||
Append(" vec4 r%d = state.float_consts[%d];\n", n, n);
|
||||
}
|
||||
|
||||
#if FLOW_CONTROL
|
||||
// Add temporary integer registers for loops that we may use.
|
||||
// Each loop uses an address, counter, and constant
|
||||
// TODO: Implement only for the used loops in the shader
|
||||
for (uint32_t n = 0; n < 32; n++) {
|
||||
Append(" int i%d_cnt = 0;\n", n);
|
||||
Append(" int i%d_addr = 0;\n", n);
|
||||
}
|
||||
#endif
|
||||
|
||||
Append(" vec4 t;\n");
|
||||
Append(" vec4 pv;\n"); // Previous Vector result.
|
||||
Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV).
|
||||
|
@ -197,8 +208,14 @@ void GL4ShaderTranslator::AppendSrcReg(const instr_alu_t& op, uint32_t num,
|
|||
Append("abs(");
|
||||
}
|
||||
Append("state.float_consts[");
|
||||
#if FLOW_CONTROL
|
||||
// NOTE(dariosamo): Some games don't seem to take into account the relative a0
|
||||
// offset even when they should due to const_slot being a different value.
|
||||
if (op.const_0_rel_abs || op.const_1_rel_abs) {
|
||||
#else
|
||||
if ((const_slot == 0 && op.const_0_rel_abs) ||
|
||||
(const_slot == 1 && op.const_1_rel_abs)) {
|
||||
#endif
|
||||
if (op.relative_addr) {
|
||||
assert_true(num < 256);
|
||||
Append("a0 + %u", is_pixel_shader() ? num + 256 : num);
|
||||
|
@ -783,7 +800,7 @@ bool GL4ShaderTranslator::TranslateALU_ADDs(const instr_alu_t& alu) {
|
|||
AppendScalarOpSrcReg(alu, 3);
|
||||
Append(".x + ");
|
||||
AppendScalarOpSrcReg(alu, 3);
|
||||
Append(".y");
|
||||
Append(".z");
|
||||
EndAppendScalarOp(alu);
|
||||
return true;
|
||||
}
|
||||
|
@ -801,7 +818,7 @@ bool GL4ShaderTranslator::TranslateALU_MULs(const instr_alu_t& alu) {
|
|||
AppendScalarOpSrcReg(alu, 3);
|
||||
Append(".x * ");
|
||||
AppendScalarOpSrcReg(alu, 3);
|
||||
Append(".y");
|
||||
Append(".z");
|
||||
EndAppendScalarOp(alu);
|
||||
return true;
|
||||
}
|
||||
|
@ -1027,7 +1044,7 @@ bool GL4ShaderTranslator::TranslateALU_SUBs(const instr_alu_t& alu) {
|
|||
AppendScalarOpSrcReg(alu, 3);
|
||||
Append(".x - ");
|
||||
AppendScalarOpSrcReg(alu, 3);
|
||||
Append(".y");
|
||||
Append(".z");
|
||||
EndAppendScalarOp(alu);
|
||||
return true;
|
||||
}
|
||||
|
@ -1457,6 +1474,12 @@ bool GL4ShaderTranslator::TranslateBlocks(GL4Shader* shader) {
|
|||
} else if (cfa.opc == COND_JMP) {
|
||||
TranslateJmp(cfa.jmp_call);
|
||||
}
|
||||
#if FLOW_CONTROL
|
||||
else if (cfa.opc == LOOP_START) {
|
||||
TranslateLoopStart(cfa.loop);
|
||||
}
|
||||
#endif // FLOW_CONTROL
|
||||
|
||||
if (cfb.opc == ALLOC) {
|
||||
// ?
|
||||
} else if (cfb.is_exec()) {
|
||||
|
@ -1471,6 +1494,12 @@ bool GL4ShaderTranslator::TranslateBlocks(GL4Shader* shader) {
|
|||
} else if (cfb.opc == COND_JMP) {
|
||||
TranslateJmp(cfb.jmp_call);
|
||||
}
|
||||
#if FLOW_CONTROL
|
||||
else if (cfb.opc == LOOP_END) {
|
||||
TranslateLoopEnd(cfb.loop);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
|
||||
break;
|
||||
}
|
||||
|
@ -1654,6 +1683,26 @@ bool GL4ShaderTranslator::TranslateJmp(const ucode::instr_cf_jmp_call_t& cf) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool GL4ShaderTranslator::TranslateLoopStart(const ucode::instr_cf_loop_t& cf) {
|
||||
Append(" // %s", cf_instructions[cf.opc].name);
|
||||
Append(" ADDR(0x%x) LOOP ID(%d)", cf.address, cf.loop_id);
|
||||
if (cf.address_mode == ABSOLUTE_ADDR) {
|
||||
Append(" ABSOLUTE_ADDR");
|
||||
}
|
||||
Append("\n");
|
||||
Append(" i%d_addr = pc;\n", cf.loop_id);
|
||||
Append(" i%d_cnt = 0;\n", cf.loop_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GL4ShaderTranslator::TranslateLoopEnd(const ucode::instr_cf_loop_t& cf) {
|
||||
Append(" // %s", cf_instructions[cf.opc].name);
|
||||
Append(" ADDR(0x%x) LOOP ID(%d)\n", cf.address, cf.loop_id);
|
||||
Append(" i%d_cnt = i%d_cnt + 1;\n", cf.loop_id, cf.loop_id);
|
||||
Append(" pc = (i%d_cnt < state.loop_consts[%d]) ? i%d_addr : pc;\n", cf.loop_id, cf.loop_id, cf.loop_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
|
||||
int sync) {
|
||||
static const struct {
|
||||
|
|
|
@ -156,6 +156,8 @@ class GL4ShaderTranslator {
|
|||
bool TranslateBlocks(GL4Shader* shader);
|
||||
bool TranslateExec(const ucode::instr_cf_exec_t& cf);
|
||||
bool TranslateJmp(const ucode::instr_cf_jmp_call_t& cf);
|
||||
bool TranslateLoopStart(const ucode::instr_cf_loop_t& cf);
|
||||
bool TranslateLoopEnd(const ucode::instr_cf_loop_t& cf);
|
||||
bool TranslateVertexFetch(const ucode::instr_fetch_vtx_t* vtx, int sync);
|
||||
bool TranslateTextureFetch(const ucode::instr_fetch_tex_t* tex, int sync);
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue