[DXBC] Fast mul path only for fully identical components because neg is post-abs
This commit is contained in:
parent
feb8258a5e
commit
ae3b68c7b6
|
@ -74,7 +74,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
||||||
DxbcOpMul(per_component_dest, operands[0], operands[1]);
|
DxbcOpMul(per_component_dest, operands[0], operands[1]);
|
||||||
uint32_t multiplicands_different =
|
uint32_t multiplicands_different =
|
||||||
used_result_components &
|
used_result_components &
|
||||||
~instr.vector_operands[0].GetIdenticalMultiplicandComponents(
|
~instr.vector_operands[0].GetIdenticalComponents(
|
||||||
instr.vector_operands[1]);
|
instr.vector_operands[1]);
|
||||||
if (multiplicands_different) {
|
if (multiplicands_different) {
|
||||||
// Shader Model 3: +-0 or denormal * anything = +0.
|
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||||
|
@ -181,15 +181,14 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
||||||
component_count = 4;
|
component_count = 4;
|
||||||
}
|
}
|
||||||
result_swizzle = DxbcSrc::kXXXX;
|
result_swizzle = DxbcSrc::kXXXX;
|
||||||
uint32_t multiplicands_different =
|
uint32_t different = uint32_t((1 << component_count) - 1) &
|
||||||
uint32_t((1 << component_count) - 1) &
|
~instr.vector_operands[0].GetIdenticalComponents(
|
||||||
~instr.vector_operands[0].GetIdenticalMultiplicandComponents(
|
instr.vector_operands[1]);
|
||||||
instr.vector_operands[1]);
|
|
||||||
for (uint32_t i = 0; i < component_count; ++i) {
|
for (uint32_t i = 0; i < component_count; ++i) {
|
||||||
DxbcOpMul(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001),
|
DxbcOpMul(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001),
|
||||||
operands[0].SelectFromSwizzled(i),
|
operands[0].SelectFromSwizzled(i),
|
||||||
operands[1].SelectFromSwizzled(i));
|
operands[1].SelectFromSwizzled(i));
|
||||||
if ((multiplicands_different & (1 << i)) != 0) {
|
if ((different & (1 << i)) != 0) {
|
||||||
// Shader Model 3: +-0 or denormal * anything = +0 (also not replacing
|
// Shader Model 3: +-0 or denormal * anything = +0 (also not replacing
|
||||||
// true `0 + term` with movc of the term because +0 + -0 should result
|
// true `0 + term` with movc of the term because +0 + -0 should result
|
||||||
// in +0, not -0).
|
// in +0, not -0).
|
||||||
|
@ -569,7 +568,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
||||||
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0010),
|
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0010),
|
||||||
operands[0].SelectFromSwizzled(1),
|
operands[0].SelectFromSwizzled(1),
|
||||||
operands[1].SelectFromSwizzled(1));
|
operands[1].SelectFromSwizzled(1));
|
||||||
if (!(instr.vector_operands[0].GetIdenticalMultiplicandComponents(
|
if (!(instr.vector_operands[0].GetIdenticalComponents(
|
||||||
instr.vector_operands[1]) &
|
instr.vector_operands[1]) &
|
||||||
0b0010)) {
|
0b0010)) {
|
||||||
// Shader Model 3: +-0 or denormal * anything = +0.
|
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||||
|
@ -987,7 +986,7 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
|
||||||
case AluScalarOpcode::kMulsc0:
|
case AluScalarOpcode::kMulsc0:
|
||||||
case AluScalarOpcode::kMulsc1:
|
case AluScalarOpcode::kMulsc1:
|
||||||
DxbcOpMul(ps_dest, operand_0_a, operand_1);
|
DxbcOpMul(ps_dest, operand_0_a, operand_1);
|
||||||
if (!(instr.scalar_operands[0].GetIdenticalMultiplicandComponents(
|
if (!(instr.scalar_operands[0].GetIdenticalComponents(
|
||||||
instr.scalar_operands[1]) &
|
instr.scalar_operands[1]) &
|
||||||
0b0001)) {
|
0b0001)) {
|
||||||
// Shader Model 3: +-0 or denormal * anything = +0.
|
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||||
|
|
|
@ -212,19 +212,18 @@ struct InstructionOperand {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns which components of two operands are identical, so that
|
// Returns which components of two operands will always be bitwise equal
|
||||||
// multiplication of them would result in pow2 with + sign, including in case
|
// (disregarding component_count for simplicity of usage with GetComponent,
|
||||||
// they're zero (because -0 * |-0|, or -0 * +0, is -0), for providing a fast
|
// treating the rightmost component as replicated). This, strictly with all
|
||||||
// path in emulation of the Shader Model 3 +-0 * x = +0 multiplication
|
// conditions, must be used when emulating Shader Model 3 +-0 * x = +0
|
||||||
// behavior (disregarding component_count for simplicity of usage with
|
// multiplication behavior with IEEE-compliant multiplication (because
|
||||||
// GetComponent, treating the rightmost component as replicated).
|
// -0 * |-0|, or -0 * +0, is -0, while the result must be +0).
|
||||||
uint32_t GetIdenticalMultiplicandComponents(
|
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
|
||||||
const InstructionOperand& other) const {
|
|
||||||
if (storage_source != other.storage_source ||
|
if (storage_source != other.storage_source ||
|
||||||
storage_index != other.storage_index ||
|
storage_index != other.storage_index ||
|
||||||
storage_addressing_mode != other.storage_addressing_mode ||
|
storage_addressing_mode != other.storage_addressing_mode ||
|
||||||
is_absolute_value != other.is_absolute_value ||
|
is_negated != other.is_negated ||
|
||||||
(!is_absolute_value && is_negated != other.is_negated)) {
|
is_absolute_value != other.is_absolute_value) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
uint32_t identical_components = 0;
|
uint32_t identical_components = 0;
|
||||||
|
@ -234,15 +233,6 @@ struct InstructionOperand {
|
||||||
}
|
}
|
||||||
return identical_components;
|
return identical_components;
|
||||||
}
|
}
|
||||||
// Returns which components of two operands will always be bitwise equal
|
|
||||||
// (disregarding component_count for simplicity of usage with GetComponent,
|
|
||||||
// treating the rightmost component as replicated).
|
|
||||||
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
|
|
||||||
if (is_negated != other.is_negated) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return GetIdenticalMultiplicandComponents(other);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ParsedExecInstruction {
|
struct ParsedExecInstruction {
|
||||||
|
|
Loading…
Reference in New Issue