Finally figured out MUL/ADD/SUB_CONST_*

This commit is contained in:
Ben Vanik 2013-11-16 18:27:17 -08:00
parent 1592cf96f7
commit 9441fb8b7a
2 changed files with 145 additions and 22 deletions

View File

@ -1002,7 +1002,7 @@ int TranslateALU_MAXs(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
ctx.output->append(" = ");
if (alu.vector_clamp) {
if (alu.scalar_clamp) {
ctx.output->append("saturate(");
}
if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
@ -1015,7 +1015,7 @@ int TranslateALU_MAXs(
AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
ctx.output->append(".y).xxxx");
}
if (alu.vector_clamp) {
if (alu.scalar_clamp) {
ctx.output->append(")");
}
ctx.output->append(";\n");
@ -1027,7 +1027,7 @@ int TranslateALU_MINs(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
ctx.output->append(" = ");
if (alu.vector_clamp) {
if (alu.scalar_clamp) {
ctx.output->append("saturate(");
}
ctx.output->append("min(");
@ -1035,7 +1035,7 @@ int TranslateALU_MINs(
ctx.output->append(".x, ");
AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
ctx.output->append(".y).xxxx");
if (alu.vector_clamp) {
if (alu.scalar_clamp) {
ctx.output->append(")");
}
ctx.output->append(";\n");
@ -1043,6 +1043,93 @@ int TranslateALU_MINs(
return 0;
}
int TranslateALU_MUL_CONST_0(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
ctx.output->append(" = ");
if (alu.scalar_clamp) {
ctx.output->append("saturate(");
}
uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
ctx.output->append("(");
AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
ctx.output->append(".%c * ", chan_names[swiz_a]);
AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
ctx.output->append(".%c", chan_names[swiz_b]);
ctx.output->append(").xxxx");
if (alu.scalar_clamp) {
ctx.output->append(")");
}
ctx.output->append(";\n");
AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
return 0;
}
int TranslateALU_MUL_CONST_1(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
return TranslateALU_MUL_CONST_0(ctx, alu);
}
int TranslateALU_ADD_CONST_0(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
ctx.output->append(" = ");
if (alu.scalar_clamp) {
ctx.output->append("saturate(");
}
uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
ctx.output->append("(");
AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
ctx.output->append(".%c + ", chan_names[swiz_a]);
AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
ctx.output->append(".%c", chan_names[swiz_b]);
ctx.output->append(").xxxx");
if (alu.scalar_clamp) {
ctx.output->append(")");
}
ctx.output->append(";\n");
AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
return 0;
}
int TranslateALU_ADD_CONST_1(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
return TranslateALU_ADD_CONST_0(ctx, alu);
}
int TranslateALU_SUB_CONST_0(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
ctx.output->append(" = ");
if (alu.scalar_clamp) {
ctx.output->append("saturate(");
}
uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
uint32_t swiz_b = (src3_swiz & 0x3);
uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
ctx.output->append("(");
AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
ctx.output->append(".%c - ", chan_names[swiz_a]);
AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
ctx.output->append(".%c", chan_names[swiz_b]);
ctx.output->append(").xxxx");
if (alu.scalar_clamp) {
ctx.output->append(")");
}
ctx.output->append(";\n");
AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
return 0;
}
int TranslateALU_SUB_CONST_1(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
return TranslateALU_SUB_CONST_0(ctx, alu);
}
typedef int (*xe_gpu_translate_alu_fn)(
xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu);
typedef struct {
@ -1129,12 +1216,12 @@ static xe_gpu_translate_alu_info_t scalar_alu_instrs[0x40] = {
ALU_INSTR(KILLONEs, 1), // 39
ALU_INSTR(SQRT_IEEE, 1), // 40
{ 0, 0, false },
ALU_INSTR(MUL_CONST_0, 2), // 42
ALU_INSTR(MUL_CONST_1, 2), // 43
ALU_INSTR(ADD_CONST_0, 2), // 44
ALU_INSTR(ADD_CONST_1, 2), // 45
ALU_INSTR(SUB_CONST_0, 2), // 46
ALU_INSTR(SUB_CONST_1, 2), // 47
ALU_INSTR_IMPL(MUL_CONST_0, 2), // 42
ALU_INSTR_IMPL(MUL_CONST_1, 2), // 43
ALU_INSTR_IMPL(ADD_CONST_0, 2), // 44
ALU_INSTR_IMPL(ADD_CONST_1, 2), // 45
ALU_INSTR_IMPL(SUB_CONST_0, 2), // 46
ALU_INSTR_IMPL(SUB_CONST_1, 2), // 47
ALU_INSTR(SIN, 1), // 48
ALU_INSTR(COS, 1), // 49
ALU_INSTR(RETAIN_PREV, 1), // 50
@ -1213,10 +1300,26 @@ int TranslateALU(
print_dstreg(output,
alu->scalar_dest, alu->scalar_write_mask, alu->export_data);
output->append(" = ");
if (is.num_srcs == 2) {
// ADD_CONST_0 dest, [const], [reg]
uint32_t src3_swiz = alu->src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
uint32_t swiz_b = (src3_swiz & 0x3);
print_srcreg(output,
alu->src3_reg, 0, 0,
alu->src3_reg_negate, alu->src3_reg_abs);
output->append(".%c", chan_names[swiz_a]);
output->append(", ");
uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1);
print_srcreg(output,
reg2, 1, 0,
alu->src3_reg_negate, alu->src3_reg_abs);
output->append(".%c", chan_names[swiz_b]);
} else {
print_srcreg(output,
alu->src3_reg, alu->src3_sel, alu->src3_swiz,
alu->src3_reg_negate, alu->src3_reg_abs);
// TODO ADD/MUL must have another src?!?
}
if (alu->scalar_clamp) {
output->append(" CLAMP");
}

View File

@ -303,22 +303,42 @@ int disasm_alu(
if (alu->scalar_write_mask || !alu->vector_write_mask) {
// 2nd optional scalar op:
if (alu->vector_write_mask) {
output->append("%s", levels[level]);
output->append(" \t\t\t\t\t");
output->append(" \t\t\t\t\t\t \t");
}
if (scalar_instructions[alu->scalar_opc].name) {
output->append("\t \t%s\t", scalar_instructions[alu->scalar_opc].name);
output->append("%s\t", scalar_instructions[alu->scalar_opc].name);
} else {
output->append("\t \tOP(%u)\t", alu->scalar_opc);
output->append("OP(%u)\t", alu->scalar_opc);
}
print_dstreg(output,
alu->scalar_dest, alu->scalar_write_mask, alu->export_data);
output->append(" = ");
if (scalar_instructions[alu->scalar_opc].num_srcs == 2) {
// MUL/ADD/etc
// Clever, CONST_0 and CONST_1 are just an extra storage bit.
// ADD_CONST_0 dest, [const], [reg]
uint32_t src3_swiz = alu->src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
uint32_t swiz_b = (src3_swiz & 0x3);
print_srcreg(output,
alu->src3_reg, 0, 0,
alu->src3_reg_negate, alu->src3_reg_abs);
output->append(".%c", chan_names[swiz_a]);
output->append(", ");
uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1);
print_srcreg(output,
reg2, 1, 0,
alu->src3_reg_negate, alu->src3_reg_abs);
output->append(".%c", chan_names[swiz_b]);
} else {
print_srcreg(output,
alu->src3_reg, alu->src3_sel, alu->src3_swiz,
alu->src3_reg_negate, alu->src3_reg_abs);
// TODO ADD/MUL must have another src?!?
}
if (alu->scalar_clamp) {
output->append(" CLAMP");
}