exp/sqrt shader ops, retain_prev hack.

This commit is contained in:
Ben Vanik 2015-01-01 16:47:19 -08:00
parent 4a11d5e6e2
commit 2241795713
3 changed files with 75 additions and 25 deletions

View File

@ -100,7 +100,8 @@ bool GL4Shader::PrepareVertexShader(
" state.viewport_offset.z;\n" " state.viewport_offset.z;\n"
" // NDC->Window with viewport.\n" " // NDC->Window with viewport.\n"
" pos.xy = pos.xy * state.window_offset.zw + state.window_offset.xy;\n" " pos.xy = pos.xy * state.window_offset.zw + state.window_offset.xy;\n"
" pos.xy = pos.xy / (vec2(1280.0 - 1.0, -720.0 + 1.0) / 2.0) + vec2(-1.0, 1.0);\n" " pos.xy = pos.xy / (vec2(1280.0 - 1.0, -720.0 + 1.0) / 2.0) +\n"
" vec2(-1.0, 1.0);\n"
" // Window adjustment.\n" " // Window adjustment.\n"
" return pos;\n" " return pos;\n"
"}\n"; "}\n";
@ -147,14 +148,13 @@ bool GL4Shader::PreparePixelShader(
} }
has_prepared_ = true; has_prepared_ = true;
std::string source = std::string source = header +
header + "layout(location = 0) in VertexData vtx;\n"
"layout(location = 0) in VertexData vtx;\n" "layout(location = 0) out vec4 oC[4];\n"
"layout(location = 0) out vec4 oC[4];\n" "void processFragment();\n"
"void processFragment();\n" "void main() {\n" +
"void main() {\n" + " processFragment();\n"
" processFragment();\n" "}\n";
"}\n";
std::string translated_source = std::string translated_source =
shader_translator_.TranslatePixelShader(this, program_cntl); shader_translator_.TranslatePixelShader(this, program_cntl);

View File

@ -255,11 +255,12 @@ void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask,
// TODO(benvanik): mask out values? mix in old value as temp? // TODO(benvanik): mask out values? mix in old value as temp?
// Append("%c", (mask & 0x1) ? chan_names[i] : 'w'); // Append("%c", (mask & 0x1) ? chan_names[i] : 'w');
if (!(mask & 0x1)) { if (!(mask & 0x1)) {
// Don't write - use existing value.
AppendDestRegName(num, dst_exp); AppendDestRegName(num, dst_exp);
Append(".%c", chan_names[i]);
} else { } else {
Append("t"); Append("t.%c", chan_names[i]);
} }
Append(".%c", chan_names[i]);
mask >>= 1; mask >>= 1;
if (i < 3) { if (i < 3) {
Append(", "); Append(", ");
@ -817,6 +818,26 @@ bool GL4ShaderTranslator::TranslateALU_SETNEs(const instr_alu_t& alu) {
return TranslateALU_SETXXs(alu, "!="); return TranslateALU_SETXXs(alu, "!=");
} }
bool GL4ShaderTranslator::TranslateALU_EXP_IEEE(const instr_alu_t& alu) {
AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("clamp(");
}
Append("exp(");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants);
Append(")");
if (alu.scalar_clamp) {
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
alu.export_data);
return true;
}
bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) {
AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
alu.export_data); alu.export_data);
@ -837,6 +858,26 @@ bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) {
return true; return true;
} }
bool GL4ShaderTranslator::TranslateALU_SQRT_IEEE(const instr_alu_t& alu) {
AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("clamp(");
}
Append("sqrt(");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants);
Append(")");
if (alu.scalar_clamp) {
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
alu.export_data);
return true;
}
bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
alu.export_data); alu.export_data);
@ -928,8 +969,15 @@ bool GL4ShaderTranslator::TranslateALU_SUB_CONST_1(const instr_alu_t& alu) {
} }
bool GL4ShaderTranslator::TranslateALU_RETAIN_PREV(const instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_RETAIN_PREV(const instr_alu_t& alu) {
// TODO(benvanik): pull out prev value in s. // TODO(benvanik): figure out how this is used.
return false; // It seems like vector writes to export regs will use this to write 1's to
// components (like w in position).
assert_true(alu.export_data == 1);
AppendDestReg(alu.vector_dest, alu.scalar_write_mask, alu.export_data);
Append(" = ");
Append("vec4(1.0, 1.0, 1.0, 1.0);\n");
AppendDestRegPost(alu.vector_dest, alu.scalar_write_mask, alu.export_data);
return true;
} }
typedef bool (GL4ShaderTranslator::*TranslateFn)(const instr_alu_t& alu); typedef bool (GL4ShaderTranslator::*TranslateFn)(const instr_alu_t& alu);
@ -991,7 +1039,7 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR(FRACs, 1), // 11 ALU_INSTR(FRACs, 1), // 11
ALU_INSTR(TRUNCs, 1), // 12 ALU_INSTR(TRUNCs, 1), // 12
ALU_INSTR(FLOORs, 1), // 13 ALU_INSTR(FLOORs, 1), // 13
ALU_INSTR(EXP_IEEE, 1), // 14 ALU_INSTR_IMPL(EXP_IEEE, 1), // 14
ALU_INSTR(LOG_CLAMP, 1), // 15 ALU_INSTR(LOG_CLAMP, 1), // 15
ALU_INSTR(LOG_IEEE, 1), // 16 ALU_INSTR(LOG_IEEE, 1), // 16
ALU_INSTR(RECIP_CLAMP, 1), // 17 ALU_INSTR(RECIP_CLAMP, 1), // 17
@ -1017,17 +1065,17 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR(KILLGTEs, 1), // 37 ALU_INSTR(KILLGTEs, 1), // 37
ALU_INSTR(KILLNEs, 1), // 38 ALU_INSTR(KILLNEs, 1), // 38
ALU_INSTR(KILLONEs, 1), // 39 ALU_INSTR(KILLONEs, 1), // 39
ALU_INSTR(SQRT_IEEE, 1), // 40 ALU_INSTR_IMPL(SQRT_IEEE, 1), // 40
{0, 0, false}, {0, 0, false}, //
ALU_INSTR_IMPL(MUL_CONST_0, 2), // 42 ALU_INSTR_IMPL(MUL_CONST_0, 2), // 42
ALU_INSTR_IMPL(MUL_CONST_1, 2), // 43 ALU_INSTR_IMPL(MUL_CONST_1, 2), // 43
ALU_INSTR_IMPL(ADD_CONST_0, 2), // 44 ALU_INSTR_IMPL(ADD_CONST_0, 2), // 44
ALU_INSTR_IMPL(ADD_CONST_1, 2), // 45 ALU_INSTR_IMPL(ADD_CONST_1, 2), // 45
ALU_INSTR_IMPL(SUB_CONST_0, 2), // 46 ALU_INSTR_IMPL(SUB_CONST_0, 2), // 46
ALU_INSTR_IMPL(SUB_CONST_1, 2), // 47 ALU_INSTR_IMPL(SUB_CONST_1, 2), // 47
ALU_INSTR(SIN, 1), // 48 ALU_INSTR(SIN, 1), // 48
ALU_INSTR(COS, 1), // 49 ALU_INSTR(COS, 1), // 49
ALU_INSTR(RETAIN_PREV, 1), // 50 ALU_INSTR_IMPL(RETAIN_PREV, 1), // 50
}; };
#undef ALU_INSTR #undef ALU_INSTR
#undef ALU_INSTR_IMPL #undef ALU_INSTR_IMPL

View File

@ -96,7 +96,9 @@ class GL4ShaderTranslator {
bool TranslateALU_SETGTs(const ucode::instr_alu_t& alu); bool TranslateALU_SETGTs(const ucode::instr_alu_t& alu);
bool TranslateALU_SETGTEs(const ucode::instr_alu_t& alu); bool TranslateALU_SETGTEs(const ucode::instr_alu_t& alu);
bool TranslateALU_SETNEs(const ucode::instr_alu_t& alu); bool TranslateALU_SETNEs(const ucode::instr_alu_t& alu);
bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_SQRT_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_MUL_CONST_0(const ucode::instr_alu_t& alu); bool TranslateALU_MUL_CONST_0(const ucode::instr_alu_t& alu);
bool TranslateALU_MUL_CONST_1(const ucode::instr_alu_t& alu); bool TranslateALU_MUL_CONST_1(const ucode::instr_alu_t& alu);
bool TranslateALU_ADD_CONST_0(const ucode::instr_alu_t& alu); bool TranslateALU_ADD_CONST_0(const ucode::instr_alu_t& alu);