[DXBC] `discard` pixels from `kill` with ROV instead of returning

Keep the current lane active as it may be needed for derivatives.
This commit is contained in:
Triang3l 2023-04-09 20:13:22 +03:00
parent 88c645d818
commit 75d805245d
2 changed files with 25 additions and 45 deletions

View File

@ -917,6 +917,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
.SelectFromSwizzled(word_index & 1); .SelectFromSwizzled(word_index & 1);
} }
void KillPixel(bool condition, const dxbc::Src& condition_src);
void ProcessVectorAluOperation(const ParsedAluInstruction& instr, void ProcessVectorAluOperation(const ParsedAluInstruction& instr,
uint32_t& result_swizzle, uint32_t& result_swizzle,
bool& predicate_written); bool& predicate_written);

View File

@ -19,6 +19,20 @@ namespace xe {
namespace gpu { namespace gpu {
using namespace ucode; using namespace ucode;
void DxbcShaderTranslator::KillPixel(bool condition,
const dxbc::Src& condition_src) {
// Discard the pixel, but continue execution if other lanes in the quad need
// this lane for derivatives. The driver may also perform early exiting
// internally if all lanes are discarded if deemed beneficial.
a_.OpDiscard(condition, condition_src);
if (edram_rov_used_) {
// Even though discarding disables all subsequent UAV/ROV writes, also skip
// as much of the Render Backend emulation logic as possible by setting the
// coverage and the mask of the written render targets to zero.
a_.OpMov(dxbc::Dest::R(system_temp_rov_params_, 0b0001), dxbc::Src::LU(0));
}
}
void DxbcShaderTranslator::ProcessVectorAluOperation( void DxbcShaderTranslator::ProcessVectorAluOperation(
const ParsedAluInstruction& instr, uint32_t& result_swizzle, const ParsedAluInstruction& instr, uint32_t& result_swizzle,
bool& predicate_written) { bool& predicate_written) {
@ -492,11 +506,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY));
if (edram_rov_used_) { KillPixel(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
} else {
a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
}
if (used_result_components) { if (used_result_components) {
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
@ -512,11 +522,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY));
if (edram_rov_used_) { KillPixel(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
} else {
a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
}
if (used_result_components) { if (used_result_components) {
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
@ -532,11 +538,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY));
if (edram_rov_used_) { KillPixel(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
} else {
a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
}
if (used_result_components) { if (used_result_components) {
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
@ -552,11 +554,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY));
if (edram_rov_used_) { KillPixel(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
} else {
a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
}
if (used_result_components) { if (used_result_components) {
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
@ -952,47 +950,27 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
case AluScalarOpcode::kKillsEq: case AluScalarOpcode::kKillsEq:
a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(0.0f));
if (edram_rov_used_) { KillPixel(true, ps_src);
a_.OpRetC(true, ps_src);
} else {
a_.OpDiscard(true, ps_src);
}
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
break; break;
case AluScalarOpcode::kKillsGt: case AluScalarOpcode::kKillsGt:
a_.OpLT(ps_dest, dxbc::Src::LF(0.0f), operand_0_a); a_.OpLT(ps_dest, dxbc::Src::LF(0.0f), operand_0_a);
if (edram_rov_used_) { KillPixel(true, ps_src);
a_.OpRetC(true, ps_src);
} else {
a_.OpDiscard(true, ps_src);
}
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
break; break;
case AluScalarOpcode::kKillsGe: case AluScalarOpcode::kKillsGe:
a_.OpGE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); a_.OpGE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f));
if (edram_rov_used_) { KillPixel(true, ps_src);
a_.OpRetC(true, ps_src);
} else {
a_.OpDiscard(true, ps_src);
}
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
break; break;
case AluScalarOpcode::kKillsNe: case AluScalarOpcode::kKillsNe:
a_.OpNE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); a_.OpNE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f));
if (edram_rov_used_) { KillPixel(true, ps_src);
a_.OpRetC(true, ps_src);
} else {
a_.OpDiscard(true, ps_src);
}
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
break; break;
case AluScalarOpcode::kKillsOne: case AluScalarOpcode::kKillsOne:
a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(1.0f)); a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(1.0f));
if (edram_rov_used_) { KillPixel(true, ps_src);
a_.OpRetC(true, ps_src);
} else {
a_.OpDiscard(true, ps_src);
}
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
break; break;