Possibly right const addressing and more shader instructions.

This commit is contained in:
Ben Vanik 2015-03-15 11:20:19 -07:00
parent b07cd49281
commit 795df80687
4 changed files with 144 additions and 39 deletions

View File

@ -46,6 +46,7 @@ std::string GL4Shader::GetHeader() {
"#extension GL_ARB_shader_draw_parameters : require\n"
"#extension GL_ARB_shader_storage_buffer_object : require\n"
"#extension GL_ARB_shading_language_420pack : require\n"
"#define FLT_MAX 3.402823466e+38\n"
"precision highp float;\n"
"precision highp int;\n"
"layout(std140, column_major) uniform;\n"

View File

@ -105,6 +105,7 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
Append(" vec4 pv;\n"); // Previous Vector result.
Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV).
Append(" bool p = false;\n"); // Predicate temp, clause-local.
Append(" int a0 = 0;\n"); // Address register.
// Execute blocks.
const auto& execs = vertex_shader->execs();
@ -141,6 +142,7 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
Append(" vec4 pv;\n"); // Previous Vector result.
Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV).
Append(" bool p = false;\n"); // Predicate temp, clause-local.
Append(" int a0 = 0;\n"); // Address register.
// Bring registers local.
for (uint32_t n = 0; n < kMaxInterpolators; n++) {
@ -161,9 +163,9 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
return output_.to_string();
}
void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
uint32_t swiz, uint32_t negate,
uint32_t abs_constants) {
void GL4ShaderTranslator::AppendSrcReg(const instr_alu_t& op, uint32_t num,
uint32_t type, uint32_t swiz,
uint32_t negate) {
if (negate) {
Append("-");
}
@ -178,11 +180,24 @@ void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
}
} else {
// Constant.
if (abs_constants) {
if (op.abs_constants) {
Append("abs(");
}
Append("state.float_consts[%u]", is_pixel_shader() ? num + 256 : num);
if (abs_constants) {
Append("state.float_consts[");
assert_true(op.const_1_rel_abs == 0);
if (op.const_0_rel_abs) {
if (op.relative_addr) {
assert_true(num < 256);
Append("a0 + %u", is_pixel_shader() ? num + 256 : num);
} else {
Append("a0");
}
} else {
assert_true(num < 256);
Append("%u", is_pixel_shader() ? num + 256 : num);
}
Append("]");
if (op.abs_constants) {
Append(")");
}
}
@ -296,16 +311,16 @@ void GL4ShaderTranslator::AppendVectorOpSrcReg(const ucode::instr_alu_t& op,
int i) {
switch (i) {
case 1:
AppendSrcReg(op.src1_reg, op.src1_sel, op.src1_swiz, op.src1_reg_negate,
op.abs_constants);
AppendSrcReg(op, op.src1_reg, op.src1_sel, op.src1_swiz,
op.src1_reg_negate);
break;
case 2:
AppendSrcReg(op.src2_reg, op.src2_sel, op.src2_swiz, op.src2_reg_negate,
op.abs_constants);
AppendSrcReg(op, op.src2_reg, op.src2_sel, op.src2_swiz,
op.src2_reg_negate);
break;
case 3:
AppendSrcReg(op.src3_reg, op.src3_sel, op.src3_swiz, op.src3_reg_negate,
op.abs_constants);
AppendSrcReg(op, op.src3_reg, op.src3_sel, op.src3_swiz,
op.src3_reg_negate);
break;
}
}
@ -385,16 +400,16 @@ void GL4ShaderTranslator::AppendScalarOpSrcReg(const ucode::instr_alu_t& op,
int i) {
switch (i) {
case 1:
AppendSrcReg(op.src1_reg, op.src1_sel, op.src1_swiz, op.src1_reg_negate,
op.abs_constants);
AppendSrcReg(op, op.src1_reg, op.src1_sel, op.src1_swiz,
op.src1_reg_negate);
break;
case 2:
AppendSrcReg(op.src2_reg, op.src2_sel, op.src2_swiz, op.src2_reg_negate,
op.abs_constants);
AppendSrcReg(op, op.src2_reg, op.src2_sel, op.src2_swiz,
op.src2_reg_negate);
break;
case 3:
AppendSrcReg(op.src3_reg, op.src3_sel, op.src3_swiz, op.src3_reg_negate,
op.abs_constants);
AppendSrcReg(op, op.src3_reg, op.src3_sel, op.src3_swiz,
op.src3_reg_negate);
break;
}
}
@ -731,7 +746,41 @@ bool GL4ShaderTranslator::TranslateALU_PRED_SETGTE_PUSHv(
return TranslateALU_PRED_SETXX_PUSHv(alu, ">=");
}
// ...
bool GL4ShaderTranslator::TranslateALU_DSTv(const ucode::instr_alu_t& alu) {
BeginAppendVectorOp(alu);
Append("vec4(1.0, (");
AppendVectorOpSrcReg(alu, 1);
Append(".y * ");
AppendVectorOpSrcReg(alu, 1);
Append(".y), ");
AppendVectorOpSrcReg(alu, 1);
Append(".z, ");
AppendVectorOpSrcReg(alu, 2);
Append(".w)");
EndAppendVectorOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_MOVAv(const ucode::instr_alu_t& alu) {
Append(" a0 = clamp(int(floor(");
AppendVectorOpSrcReg(alu, 1);
Append(".w + 0.5)), -256, 255);\n");
BeginAppendVectorOp(alu);
if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel &&
alu.src1_swiz == alu.src2_swiz &&
alu.src1_reg_negate == alu.src2_reg_negate) {
// This is a mov.
AppendVectorOpSrcReg(alu, 1);
} else {
Append("max(");
AppendVectorOpSrcReg(alu, 1);
Append(", ");
AppendVectorOpSrcReg(alu, 2);
Append(")");
}
EndAppendVectorOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_ADDs(const instr_alu_t& alu) {
BeginAppendScalarOp(alu);
@ -745,8 +794,8 @@ bool GL4ShaderTranslator::TranslateALU_ADDs(const instr_alu_t& alu) {
bool GL4ShaderTranslator::TranslateALU_ADD_PREVs(const instr_alu_t& alu) {
BeginAppendScalarOp(alu);
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants);
AppendSrcReg(alu, alu.src3_reg, alu.src3_sel, alu.src3_swiz,
alu.src3_reg_negate);
Append(".x + ps");
EndAppendScalarOp(alu);
return true;
@ -764,8 +813,8 @@ bool GL4ShaderTranslator::TranslateALU_MULs(const instr_alu_t& alu) {
bool GL4ShaderTranslator::TranslateALU_MUL_PREVs(const instr_alu_t& alu) {
BeginAppendScalarOp(alu);
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants);
AppendSrcReg(alu, alu.src3_reg, alu.src3_sel, alu.src3_swiz,
alu.src3_reg_negate);
Append(".x * ps");
EndAppendScalarOp(alu);
return true;
@ -858,6 +907,17 @@ bool GL4ShaderTranslator::TranslateALU_EXP_IEEE(const instr_alu_t& alu) {
return true;
}
bool GL4ShaderTranslator::TranslateALU_LOG_CLAMP(
const ucode::instr_alu_t& alu) {
Append(" ps = log2(");
AppendScalarOpSrcReg(alu, 3);
Append(".x);");
BeginAppendScalarOp(alu);
Append("isinf(ps) && ps < 0.0 ? -FLT_MAX : ps");
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu) {
BeginAppendScalarOp(alu);
Append("log2(");
@ -929,7 +989,44 @@ bool GL4ShaderTranslator::TranslateALU_RECIPSQ_IEEE(
return true;
}
// ...
bool GL4ShaderTranslator::TranslateALU_MOVAs(const ucode::instr_alu_t& alu) {
Append(" a0 = clamp(int(floor(");
AppendScalarOpSrcReg(alu, 3);
Append(".x + 0.5)), -256, 255);\n");
BeginAppendScalarOp(alu);
if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
// This is a mov.
AppendScalarOpSrcReg(alu, 3);
} else {
Append("max(");
AppendScalarOpSrcReg(alu, 3);
Append(".x, ");
AppendScalarOpSrcReg(alu, 3);
Append(".y)");
}
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_MOVA_FLOORs(
const ucode::instr_alu_t& alu) {
Append(" a0 = clamp(int(floor(");
AppendScalarOpSrcReg(alu, 3);
Append(".x)), -256, 255);\n");
BeginAppendScalarOp(alu);
if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
// This is a mov.
AppendScalarOpSrcReg(alu, 3);
} else {
Append("max(");
AppendScalarOpSrcReg(alu, 3);
Append(".x, ");
AppendScalarOpSrcReg(alu, 3);
Append(".y)");
}
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_SUBs(const instr_alu_t& alu) {
BeginAppendScalarOp(alu);
@ -1015,9 +1112,9 @@ bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 =
(alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants);
AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate);
Append(".%c * ", chan_names[swiz_a]);
AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants);
AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate);
Append(".%c", chan_names[swiz_b]);
EndAppendScalarOp(alu);
return true;
@ -1033,9 +1130,9 @@ bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) {
uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 =
(alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants);
AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate);
Append(".%c + ", chan_names[swiz_a]);
AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants);
AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate);
Append(".%c", chan_names[swiz_b]);
EndAppendScalarOp(alu);
return true;
@ -1051,9 +1148,9 @@ bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) {
uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 =
(alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants);
AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate);
Append(".%c - ", chan_names[swiz_a]);
AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants);
AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate);
Append(".%c", chan_names[swiz_b]);
EndAppendScalarOp(alu);
return true;
@ -1132,8 +1229,8 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR(KILLGTv, 2), // 25
ALU_INSTR(KILLGTEv, 2), // 26
ALU_INSTR(KILLNEv, 2), // 27
ALU_INSTR(DSTv, 2), // 28
ALU_INSTR(MOVAv, 1), // 29
ALU_INSTR_IMPL(DSTv, 2), // 28
ALU_INSTR_IMPL(MOVAv, 1), // 29
};
static TranslateInfo scalar_alu_instrs[0x40] = {
ALU_INSTR_IMPL(ADDs, 1), // 0
@ -1151,7 +1248,7 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR_IMPL(TRUNCs, 1), // 12
ALU_INSTR_IMPL(FLOORs, 1), // 13
ALU_INSTR_IMPL(EXP_IEEE, 1), // 14
ALU_INSTR(LOG_CLAMP, 1), // 15
ALU_INSTR_IMPL(LOG_CLAMP, 1), // 15
ALU_INSTR_IMPL(LOG_IEEE, 1), // 16
ALU_INSTR_IMPL(RECIP_CLAMP, 1), // 17
ALU_INSTR_IMPL(RECIP_FF, 1), // 18
@ -1159,8 +1256,8 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR_IMPL(RECIPSQ_CLAMP, 1), // 20
ALU_INSTR_IMPL(RECIPSQ_FF, 1), // 21
ALU_INSTR_IMPL(RECIPSQ_IEEE, 1), // 22
ALU_INSTR(MOVAs, 1), // 23
ALU_INSTR(MOVA_FLOORs, 1), // 24
ALU_INSTR_IMPL(MOVAs, 1), // 23
ALU_INSTR_IMPL(MOVA_FLOORs, 1), // 24
ALU_INSTR_IMPL(SUBs, 1), // 25
ALU_INSTR_IMPL(SUB_PREVs, 1), // 26
ALU_INSTR_IMPL(PRED_SETEs, 1), // 27

View File

@ -55,8 +55,8 @@ class GL4ShaderTranslator {
va_end(args);
}
void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
uint32_t abs);
void AppendSrcReg(const ucode::instr_alu_t& op, uint32_t num, uint32_t type,
uint32_t swiz, uint32_t negate);
void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
uint32_t abs);
void PrintVectorDstReg(const ucode::instr_alu_t& alu);
@ -92,7 +92,8 @@ class GL4ShaderTranslator {
bool TranslateALU_PRED_SETNE_PUSHv(const ucode::instr_alu_t& alu);
bool TranslateALU_PRED_SETGT_PUSHv(const ucode::instr_alu_t& alu);
bool TranslateALU_PRED_SETGTE_PUSHv(const ucode::instr_alu_t& alu);
// ...
bool TranslateALU_DSTv(const ucode::instr_alu_t& alu);
bool TranslateALU_MOVAv(const ucode::instr_alu_t& alu);
bool TranslateALU_ADDs(const ucode::instr_alu_t& alu);
bool TranslateALU_ADD_PREVs(const ucode::instr_alu_t& alu);
bool TranslateALU_MULs(const ucode::instr_alu_t& alu);
@ -109,6 +110,7 @@ class GL4ShaderTranslator {
bool TranslateALU_TRUNCs(const ucode::instr_alu_t& alu);
bool TranslateALU_FLOORs(const ucode::instr_alu_t& alu);
bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_LOG_CLAMP(const ucode::instr_alu_t& alu);
bool TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIP_CLAMP(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIP_FF(const ucode::instr_alu_t& alu);
@ -116,7 +118,8 @@ class GL4ShaderTranslator {
bool TranslateALU_RECIPSQ_CLAMP(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIPSQ_FF(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIPSQ_IEEE(const ucode::instr_alu_t& alu);
// ...
bool TranslateALU_MOVAs(const ucode::instr_alu_t& alu);
bool TranslateALU_MOVA_FLOORs(const ucode::instr_alu_t& alu);
bool TranslateALU_SUBs(const ucode::instr_alu_t& alu);
bool TranslateALU_SUB_PREVs(const ucode::instr_alu_t& alu);
bool TranslateALU_PRED_SETXXs(const ucode::instr_alu_t& alu, const char* op);

View File

@ -32,6 +32,10 @@ namespace ucode {
#define XEPACKEDUNION(name, value) union __attribute__((packed)) name
#endif // MSVC
// Closest AMD doc:
// http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
// Microcode format differs, but most fields/enums are the same.
// This code comes from the freedreno project:
// https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h
/*