Possibly right const addressing and more shader instructions.

This commit is contained in:
Ben Vanik 2015-03-15 11:20:19 -07:00
parent b07cd49281
commit 795df80687
4 changed files with 144 additions and 39 deletions

View File

@ -46,6 +46,7 @@ std::string GL4Shader::GetHeader() {
"#extension GL_ARB_shader_draw_parameters : require\n" "#extension GL_ARB_shader_draw_parameters : require\n"
"#extension GL_ARB_shader_storage_buffer_object : require\n" "#extension GL_ARB_shader_storage_buffer_object : require\n"
"#extension GL_ARB_shading_language_420pack : require\n" "#extension GL_ARB_shading_language_420pack : require\n"
"#define FLT_MAX 3.402823466e+38\n"
"precision highp float;\n" "precision highp float;\n"
"precision highp int;\n" "precision highp int;\n"
"layout(std140, column_major) uniform;\n" "layout(std140, column_major) uniform;\n"

View File

@ -105,6 +105,7 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
Append(" vec4 pv;\n"); // Previous Vector result. Append(" vec4 pv;\n"); // Previous Vector result.
Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV). Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV).
Append(" bool p = false;\n"); // Predicate temp, clause-local. Append(" bool p = false;\n"); // Predicate temp, clause-local.
Append(" int a0 = 0;\n"); // Address register.
// Execute blocks. // Execute blocks.
const auto& execs = vertex_shader->execs(); const auto& execs = vertex_shader->execs();
@ -141,6 +142,7 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
Append(" vec4 pv;\n"); // Previous Vector result. Append(" vec4 pv;\n"); // Previous Vector result.
Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV). Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV).
Append(" bool p = false;\n"); // Predicate temp, clause-local. Append(" bool p = false;\n"); // Predicate temp, clause-local.
Append(" int a0 = 0;\n"); // Address register.
// Bring registers local. // Bring registers local.
for (uint32_t n = 0; n < kMaxInterpolators; n++) { for (uint32_t n = 0; n < kMaxInterpolators; n++) {
@ -161,9 +163,9 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
return output_.to_string(); return output_.to_string();
} }
void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type, void GL4ShaderTranslator::AppendSrcReg(const instr_alu_t& op, uint32_t num,
uint32_t swiz, uint32_t negate, uint32_t type, uint32_t swiz,
uint32_t abs_constants) { uint32_t negate) {
if (negate) { if (negate) {
Append("-"); Append("-");
} }
@ -178,11 +180,24 @@ void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
} }
} else { } else {
// Constant. // Constant.
if (abs_constants) { if (op.abs_constants) {
Append("abs("); Append("abs(");
} }
Append("state.float_consts[%u]", is_pixel_shader() ? num + 256 : num); Append("state.float_consts[");
if (abs_constants) { assert_true(op.const_1_rel_abs == 0);
if (op.const_0_rel_abs) {
if (op.relative_addr) {
assert_true(num < 256);
Append("a0 + %u", is_pixel_shader() ? num + 256 : num);
} else {
Append("a0");
}
} else {
assert_true(num < 256);
Append("%u", is_pixel_shader() ? num + 256 : num);
}
Append("]");
if (op.abs_constants) {
Append(")"); Append(")");
} }
} }
@ -296,16 +311,16 @@ void GL4ShaderTranslator::AppendVectorOpSrcReg(const ucode::instr_alu_t& op,
int i) { int i) {
switch (i) { switch (i) {
case 1: case 1:
AppendSrcReg(op.src1_reg, op.src1_sel, op.src1_swiz, op.src1_reg_negate, AppendSrcReg(op, op.src1_reg, op.src1_sel, op.src1_swiz,
op.abs_constants); op.src1_reg_negate);
break; break;
case 2: case 2:
AppendSrcReg(op.src2_reg, op.src2_sel, op.src2_swiz, op.src2_reg_negate, AppendSrcReg(op, op.src2_reg, op.src2_sel, op.src2_swiz,
op.abs_constants); op.src2_reg_negate);
break; break;
case 3: case 3:
AppendSrcReg(op.src3_reg, op.src3_sel, op.src3_swiz, op.src3_reg_negate, AppendSrcReg(op, op.src3_reg, op.src3_sel, op.src3_swiz,
op.abs_constants); op.src3_reg_negate);
break; break;
} }
} }
@ -385,16 +400,16 @@ void GL4ShaderTranslator::AppendScalarOpSrcReg(const ucode::instr_alu_t& op,
int i) { int i) {
switch (i) { switch (i) {
case 1: case 1:
AppendSrcReg(op.src1_reg, op.src1_sel, op.src1_swiz, op.src1_reg_negate, AppendSrcReg(op, op.src1_reg, op.src1_sel, op.src1_swiz,
op.abs_constants); op.src1_reg_negate);
break; break;
case 2: case 2:
AppendSrcReg(op.src2_reg, op.src2_sel, op.src2_swiz, op.src2_reg_negate, AppendSrcReg(op, op.src2_reg, op.src2_sel, op.src2_swiz,
op.abs_constants); op.src2_reg_negate);
break; break;
case 3: case 3:
AppendSrcReg(op.src3_reg, op.src3_sel, op.src3_swiz, op.src3_reg_negate, AppendSrcReg(op, op.src3_reg, op.src3_sel, op.src3_swiz,
op.abs_constants); op.src3_reg_negate);
break; break;
} }
} }
@ -731,7 +746,41 @@ bool GL4ShaderTranslator::TranslateALU_PRED_SETGTE_PUSHv(
return TranslateALU_PRED_SETXX_PUSHv(alu, ">="); return TranslateALU_PRED_SETXX_PUSHv(alu, ">=");
} }
// ... bool GL4ShaderTranslator::TranslateALU_DSTv(const ucode::instr_alu_t& alu) {
BeginAppendVectorOp(alu);
Append("vec4(1.0, (");
AppendVectorOpSrcReg(alu, 1);
Append(".y * ");
AppendVectorOpSrcReg(alu, 1);
Append(".y), ");
AppendVectorOpSrcReg(alu, 1);
Append(".z, ");
AppendVectorOpSrcReg(alu, 2);
Append(".w)");
EndAppendVectorOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_MOVAv(const ucode::instr_alu_t& alu) {
Append(" a0 = clamp(int(floor(");
AppendVectorOpSrcReg(alu, 1);
Append(".w + 0.5)), -256, 255);\n");
BeginAppendVectorOp(alu);
if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel &&
alu.src1_swiz == alu.src2_swiz &&
alu.src1_reg_negate == alu.src2_reg_negate) {
// This is a mov.
AppendVectorOpSrcReg(alu, 1);
} else {
Append("max(");
AppendVectorOpSrcReg(alu, 1);
Append(", ");
AppendVectorOpSrcReg(alu, 2);
Append(")");
}
EndAppendVectorOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_ADDs(const instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_ADDs(const instr_alu_t& alu) {
BeginAppendScalarOp(alu); BeginAppendScalarOp(alu);
@ -745,8 +794,8 @@ bool GL4ShaderTranslator::TranslateALU_ADDs(const instr_alu_t& alu) {
bool GL4ShaderTranslator::TranslateALU_ADD_PREVs(const instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_ADD_PREVs(const instr_alu_t& alu) {
BeginAppendScalarOp(alu); BeginAppendScalarOp(alu);
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, AppendSrcReg(alu, alu.src3_reg, alu.src3_sel, alu.src3_swiz,
alu.abs_constants); alu.src3_reg_negate);
Append(".x + ps"); Append(".x + ps");
EndAppendScalarOp(alu); EndAppendScalarOp(alu);
return true; return true;
@ -764,8 +813,8 @@ bool GL4ShaderTranslator::TranslateALU_MULs(const instr_alu_t& alu) {
bool GL4ShaderTranslator::TranslateALU_MUL_PREVs(const instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_MUL_PREVs(const instr_alu_t& alu) {
BeginAppendScalarOp(alu); BeginAppendScalarOp(alu);
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, AppendSrcReg(alu, alu.src3_reg, alu.src3_sel, alu.src3_swiz,
alu.abs_constants); alu.src3_reg_negate);
Append(".x * ps"); Append(".x * ps");
EndAppendScalarOp(alu); EndAppendScalarOp(alu);
return true; return true;
@ -858,6 +907,17 @@ bool GL4ShaderTranslator::TranslateALU_EXP_IEEE(const instr_alu_t& alu) {
return true; return true;
} }
bool GL4ShaderTranslator::TranslateALU_LOG_CLAMP(
const ucode::instr_alu_t& alu) {
Append(" ps = log2(");
AppendScalarOpSrcReg(alu, 3);
Append(".x);");
BeginAppendScalarOp(alu);
Append("isinf(ps) && ps < 0.0 ? -FLT_MAX : ps");
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu) {
BeginAppendScalarOp(alu); BeginAppendScalarOp(alu);
Append("log2("); Append("log2(");
@ -929,7 +989,44 @@ bool GL4ShaderTranslator::TranslateALU_RECIPSQ_IEEE(
return true; return true;
} }
// ... bool GL4ShaderTranslator::TranslateALU_MOVAs(const ucode::instr_alu_t& alu) {
Append(" a0 = clamp(int(floor(");
AppendScalarOpSrcReg(alu, 3);
Append(".x + 0.5)), -256, 255);\n");
BeginAppendScalarOp(alu);
if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
// This is a mov.
AppendScalarOpSrcReg(alu, 3);
} else {
Append("max(");
AppendScalarOpSrcReg(alu, 3);
Append(".x, ");
AppendScalarOpSrcReg(alu, 3);
Append(".y)");
}
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_MOVA_FLOORs(
const ucode::instr_alu_t& alu) {
Append(" a0 = clamp(int(floor(");
AppendScalarOpSrcReg(alu, 3);
Append(".x)), -256, 255);\n");
BeginAppendScalarOp(alu);
if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
// This is a mov.
AppendScalarOpSrcReg(alu, 3);
} else {
Append("max(");
AppendScalarOpSrcReg(alu, 3);
Append(".x, ");
AppendScalarOpSrcReg(alu, 3);
Append(".y)");
}
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_SUBs(const instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_SUBs(const instr_alu_t& alu) {
BeginAppendScalarOp(alu); BeginAppendScalarOp(alu);
@ -1015,9 +1112,9 @@ bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
uint32_t swiz_b = (src3_swiz & 0x3); uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 = uint32_t reg2 =
(alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate);
Append(".%c * ", chan_names[swiz_a]); Append(".%c * ", chan_names[swiz_a]);
AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate);
Append(".%c", chan_names[swiz_b]); Append(".%c", chan_names[swiz_b]);
EndAppendScalarOp(alu); EndAppendScalarOp(alu);
return true; return true;
@ -1033,9 +1130,9 @@ bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) {
uint32_t swiz_b = (src3_swiz & 0x3); uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 = uint32_t reg2 =
(alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate);
Append(".%c + ", chan_names[swiz_a]); Append(".%c + ", chan_names[swiz_a]);
AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate);
Append(".%c", chan_names[swiz_b]); Append(".%c", chan_names[swiz_b]);
EndAppendScalarOp(alu); EndAppendScalarOp(alu);
return true; return true;
@ -1051,9 +1148,9 @@ bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) {
uint32_t swiz_b = (src3_swiz & 0x3); uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 = uint32_t reg2 =
(alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate);
Append(".%c - ", chan_names[swiz_a]); Append(".%c - ", chan_names[swiz_a]);
AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate);
Append(".%c", chan_names[swiz_b]); Append(".%c", chan_names[swiz_b]);
EndAppendScalarOp(alu); EndAppendScalarOp(alu);
return true; return true;
@ -1132,8 +1229,8 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR(KILLGTv, 2), // 25 ALU_INSTR(KILLGTv, 2), // 25
ALU_INSTR(KILLGTEv, 2), // 26 ALU_INSTR(KILLGTEv, 2), // 26
ALU_INSTR(KILLNEv, 2), // 27 ALU_INSTR(KILLNEv, 2), // 27
ALU_INSTR(DSTv, 2), // 28 ALU_INSTR_IMPL(DSTv, 2), // 28
ALU_INSTR(MOVAv, 1), // 29 ALU_INSTR_IMPL(MOVAv, 1), // 29
}; };
static TranslateInfo scalar_alu_instrs[0x40] = { static TranslateInfo scalar_alu_instrs[0x40] = {
ALU_INSTR_IMPL(ADDs, 1), // 0 ALU_INSTR_IMPL(ADDs, 1), // 0
@ -1151,7 +1248,7 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR_IMPL(TRUNCs, 1), // 12 ALU_INSTR_IMPL(TRUNCs, 1), // 12
ALU_INSTR_IMPL(FLOORs, 1), // 13 ALU_INSTR_IMPL(FLOORs, 1), // 13
ALU_INSTR_IMPL(EXP_IEEE, 1), // 14 ALU_INSTR_IMPL(EXP_IEEE, 1), // 14
ALU_INSTR(LOG_CLAMP, 1), // 15 ALU_INSTR_IMPL(LOG_CLAMP, 1), // 15
ALU_INSTR_IMPL(LOG_IEEE, 1), // 16 ALU_INSTR_IMPL(LOG_IEEE, 1), // 16
ALU_INSTR_IMPL(RECIP_CLAMP, 1), // 17 ALU_INSTR_IMPL(RECIP_CLAMP, 1), // 17
ALU_INSTR_IMPL(RECIP_FF, 1), // 18 ALU_INSTR_IMPL(RECIP_FF, 1), // 18
@ -1159,8 +1256,8 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR_IMPL(RECIPSQ_CLAMP, 1), // 20 ALU_INSTR_IMPL(RECIPSQ_CLAMP, 1), // 20
ALU_INSTR_IMPL(RECIPSQ_FF, 1), // 21 ALU_INSTR_IMPL(RECIPSQ_FF, 1), // 21
ALU_INSTR_IMPL(RECIPSQ_IEEE, 1), // 22 ALU_INSTR_IMPL(RECIPSQ_IEEE, 1), // 22
ALU_INSTR(MOVAs, 1), // 23 ALU_INSTR_IMPL(MOVAs, 1), // 23
ALU_INSTR(MOVA_FLOORs, 1), // 24 ALU_INSTR_IMPL(MOVA_FLOORs, 1), // 24
ALU_INSTR_IMPL(SUBs, 1), // 25 ALU_INSTR_IMPL(SUBs, 1), // 25
ALU_INSTR_IMPL(SUB_PREVs, 1), // 26 ALU_INSTR_IMPL(SUB_PREVs, 1), // 26
ALU_INSTR_IMPL(PRED_SETEs, 1), // 27 ALU_INSTR_IMPL(PRED_SETEs, 1), // 27

View File

@ -55,8 +55,8 @@ class GL4ShaderTranslator {
va_end(args); va_end(args);
} }
void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, void AppendSrcReg(const ucode::instr_alu_t& op, uint32_t num, uint32_t type,
uint32_t abs); uint32_t swiz, uint32_t negate);
void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
uint32_t abs); uint32_t abs);
void PrintVectorDstReg(const ucode::instr_alu_t& alu); void PrintVectorDstReg(const ucode::instr_alu_t& alu);
@ -92,7 +92,8 @@ class GL4ShaderTranslator {
bool TranslateALU_PRED_SETNE_PUSHv(const ucode::instr_alu_t& alu); bool TranslateALU_PRED_SETNE_PUSHv(const ucode::instr_alu_t& alu);
bool TranslateALU_PRED_SETGT_PUSHv(const ucode::instr_alu_t& alu); bool TranslateALU_PRED_SETGT_PUSHv(const ucode::instr_alu_t& alu);
bool TranslateALU_PRED_SETGTE_PUSHv(const ucode::instr_alu_t& alu); bool TranslateALU_PRED_SETGTE_PUSHv(const ucode::instr_alu_t& alu);
// ... bool TranslateALU_DSTv(const ucode::instr_alu_t& alu);
bool TranslateALU_MOVAv(const ucode::instr_alu_t& alu);
bool TranslateALU_ADDs(const ucode::instr_alu_t& alu); bool TranslateALU_ADDs(const ucode::instr_alu_t& alu);
bool TranslateALU_ADD_PREVs(const ucode::instr_alu_t& alu); bool TranslateALU_ADD_PREVs(const ucode::instr_alu_t& alu);
bool TranslateALU_MULs(const ucode::instr_alu_t& alu); bool TranslateALU_MULs(const ucode::instr_alu_t& alu);
@ -109,6 +110,7 @@ class GL4ShaderTranslator {
bool TranslateALU_TRUNCs(const ucode::instr_alu_t& alu); bool TranslateALU_TRUNCs(const ucode::instr_alu_t& alu);
bool TranslateALU_FLOORs(const ucode::instr_alu_t& alu); bool TranslateALU_FLOORs(const ucode::instr_alu_t& alu);
bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_LOG_CLAMP(const ucode::instr_alu_t& alu);
bool TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIP_CLAMP(const ucode::instr_alu_t& alu); bool TranslateALU_RECIP_CLAMP(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIP_FF(const ucode::instr_alu_t& alu); bool TranslateALU_RECIP_FF(const ucode::instr_alu_t& alu);
@ -116,7 +118,8 @@ class GL4ShaderTranslator {
bool TranslateALU_RECIPSQ_CLAMP(const ucode::instr_alu_t& alu); bool TranslateALU_RECIPSQ_CLAMP(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIPSQ_FF(const ucode::instr_alu_t& alu); bool TranslateALU_RECIPSQ_FF(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIPSQ_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_RECIPSQ_IEEE(const ucode::instr_alu_t& alu);
// ... bool TranslateALU_MOVAs(const ucode::instr_alu_t& alu);
bool TranslateALU_MOVA_FLOORs(const ucode::instr_alu_t& alu);
bool TranslateALU_SUBs(const ucode::instr_alu_t& alu); bool TranslateALU_SUBs(const ucode::instr_alu_t& alu);
bool TranslateALU_SUB_PREVs(const ucode::instr_alu_t& alu); bool TranslateALU_SUB_PREVs(const ucode::instr_alu_t& alu);
bool TranslateALU_PRED_SETXXs(const ucode::instr_alu_t& alu, const char* op); bool TranslateALU_PRED_SETXXs(const ucode::instr_alu_t& alu, const char* op);

View File

@ -32,6 +32,10 @@ namespace ucode {
#define XEPACKEDUNION(name, value) union __attribute__((packed)) name #define XEPACKEDUNION(name, value) union __attribute__((packed)) name
#endif // MSVC #endif // MSVC
// Closest AMD doc:
// http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
// Microcode format differs, but most fields/enums are the same.
// This code comes from the freedreno project: // This code comes from the freedreno project:
// https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h // https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h
/* /*