[D3D12] DXBC: Skip loading and some ALU ops for identical operands

This commit is contained in:
Triang3l 2018-12-09 00:20:13 +03:00
parent 352a443c67
commit 1ee3ed03fd
2 changed files with 571 additions and 396 deletions

View File

@ -10850,10 +10850,30 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
// while storing. // while storing.
bool replicate_result = false; bool replicate_result = false;
// A small shortcut, operands of cube are the same, but swizzled.
uint32_t operand_count;
if (instr.vector_opcode == AluVectorOpcode::kCube) {
operand_count = 1;
} else {
operand_count = uint32_t(instr.operand_count);
}
DxbcSourceOperand dxbc_operands[3]; DxbcSourceOperand dxbc_operands[3];
// Whether the operand is the same as any previous operand, and thus is loaded
// only once.
bool operands_duplicate[3] = {};
uint32_t operand_length_sums[3]; uint32_t operand_length_sums[3];
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) { for (uint32_t i = 0; i < operand_count; ++i) {
LoadDxbcSourceOperand(instr.operands[i], dxbc_operands[i]); const InstructionOperand& operand = instr.operands[i];
for (uint32_t j = 0; j < i; ++j) {
if (operand == instr.operands[j]) {
operands_duplicate[i] = true;
dxbc_operands[i] = dxbc_operands[j];
break;
}
}
if (!operands_duplicate[i]) {
LoadDxbcSourceOperand(operand, dxbc_operands[i]);
}
operand_length_sums[i] = DxbcSourceOperandLength(dxbc_operands[i]); operand_length_sums[i] = DxbcSourceOperandLength(dxbc_operands[i]);
if (i != 0) { if (i != 0) {
operand_length_sums[i] += operand_length_sums[i - 1]; operand_length_sums[i] += operand_length_sums[i - 1];
@ -10896,12 +10916,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
switch (instr.vector_opcode) { switch (instr.vector_opcode) {
case AluVectorOpcode::kAdd: case AluVectorOpcode::kAdd:
case AluVectorOpcode::kMax: shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
// max is commonly used as mov, but probably better not to convert it to
// make sure things like flusing denormals aren't affected.
case AluVectorOpcode::kMin:
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + operand_length_sums[1])); 3 + operand_length_sums[1]));
shader_code_.push_back( shader_code_.push_back(
@ -10924,6 +10939,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1]); UseDxbcSourceOperand(dxbc_operands[1]);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0), // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0),
// flushing denormals (must be done using eq - doing bitwise comparison // flushing denormals (must be done using eq - doing bitwise comparison
// doesn't flush denormals). // doesn't flush denormals).
@ -10931,8 +10947,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
// image missing because rcp(0) is multiplied by 0, which results in NaN // image missing because rcp(0) is multiplied by 0, which results in NaN
// rather than 0. // rather than 0.
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them is // Get the non-NaN multiplicand closer to zero to check if any of them
// zero. // is zero.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -10947,7 +10963,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Check if any multiplicand is zero (min isn't required to flush // Check if any multiplicand is zero (min isn't required to flush
// denormals in the result). // denormals in the result).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
@ -10964,7 +10981,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Zero the result if any multiplicand is zero. // Zero the result if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
@ -10985,8 +11003,39 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
// Release is_subnormal_temp. // Release is_subnormal_temp.
PopSystemTemp(); PopSystemTemp();
}
} break; } break;
case AluVectorOpcode::kMax:
case AluVectorOpcode::kMin:
// max is commonly used as mov.
if (operands_duplicate[1]) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + operand_length_sums[0]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
UseDxbcSourceOperand(dxbc_operands[0]);
++stat_.instruction_count;
++stat_.mov_instruction_count;
} else {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + operand_length_sums[1]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
UseDxbcSourceOperand(dxbc_operands[0]);
UseDxbcSourceOperand(dxbc_operands[1]);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
break;
case AluVectorOpcode::kSeq: case AluVectorOpcode::kSeq:
case AluVectorOpcode::kSgt: case AluVectorOpcode::kSgt:
case AluVectorOpcode::kSge: case AluVectorOpcode::kSge:
@ -11054,11 +11103,12 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
UseDxbcSourceOperand(dxbc_operands[2]); UseDxbcSourceOperand(dxbc_operands[2]);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
// If any operand is zero or denormalized, just leave the addition part. // If any operand is zero or denormalized, just leave the addition part.
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them is // Get the non-NaN multiplicand closer to zero to check if any of them
// zero. // is zero.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -11073,7 +11123,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Check if any multiplicand is zero (min isn't required to flush // Check if any multiplicand is zero (min isn't required to flush
// denormals in the result). // denormals in the result).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
@ -11108,6 +11159,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
// Release is_subnormal_temp. // Release is_subnormal_temp.
PopSystemTemp(); PopSystemTemp();
}
} break; } break;
// Using true eq to compare with zero because it handles denormals and -0. // Using true eq to compare with zero because it handles denormals and -0.
@ -11158,6 +11210,22 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
case AluVectorOpcode::kDp4: case AluVectorOpcode::kDp4:
case AluVectorOpcode::kDp3: case AluVectorOpcode::kDp3:
case AluVectorOpcode::kDp2Add: { case AluVectorOpcode::kDp2Add: {
if (instr.operands[0].EqualsAbsolute(instr.operands[1])) {
// The operands are the same when calculating vector length, no need to
// emulate 0 * anything = 0 in this case.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + operand_length_sums[1]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
UseDxbcSourceOperand(dxbc_operands[0]);
UseDxbcSourceOperand(dxbc_operands[1]);
++stat_.instruction_count;
++stat_.float_instruction_count;
} else {
uint32_t operand_mask; uint32_t operand_mask;
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) { if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
operand_mask = 0b0011; operand_mask = 0b0011;
@ -11187,7 +11255,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
shader_code_.push_back(0); shader_code_.push_back(0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
10 + operand_length_sums[0])); 10 + operand_length_sums[0]));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
@ -11241,7 +11310,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
// Calculate the dot product. // Calculate the dot product.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
kCoreOpcodes[uint32_t(instr.vector_opcode)]) | kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
@ -11257,6 +11327,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Release src1_temp. // Release src1_temp.
PopSystemTemp(); PopSystemTemp();
}
// Add src2.x for dp2add. // Add src2.x for dp2add.
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) { if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
shader_code_.push_back( shader_code_.push_back(
@ -11839,12 +11910,13 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1); UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
// This is an attenuation calculation function, so infinity is probably // This is an attenuation calculation function, so infinity is probably
// not very unlikely. // not very unlikely.
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them is // Get the non-NaN multiplicand closer to zero to check if any of them
// zero. // is zero.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -11873,7 +11945,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Set pv.y to zero if any multiplicand is zero. // Set pv.y to zero if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
@ -11891,6 +11964,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
// Release is_subnormal_temp. // Release is_subnormal_temp.
PopSystemTemp(); PopSystemTemp();
}
// pv.z = src0.z // pv.z = src0.z
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -11992,7 +12066,20 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.conversion_instruction_count; ++stat_.conversion_instruction_count;
// The `pv = max(src0, src1)` part. // The `pv = max(src0, src1)` part.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) | if (operands_duplicate[1]) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + operand_length_sums[0]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
UseDxbcSourceOperand(dxbc_operands[0]);
++stat_.instruction_count;
++stat_.mov_instruction_count;
} else {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + operand_length_sums[1])); 3 + operand_length_sums[1]));
shader_code_.push_back( shader_code_.push_back(
@ -12002,6 +12089,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1]); UseDxbcSourceOperand(dxbc_operands[1]);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
}
break; break;
default: default:
@ -12010,8 +12098,11 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
break; break;
} }
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) { for (uint32_t i = 0; i < operand_count; ++i) {
UnloadDxbcSourceOperand(dxbc_operands[instr.operand_count - 1 - i]); uint32_t operand_index = operand_count - 1 - i;
if (!operands_duplicate[operand_index]) {
UnloadDxbcSourceOperand(dxbc_operands[operand_index]);
}
} }
StoreResult(instr.result, system_temp_pv_, replicate_result); StoreResult(instr.result, system_temp_pv_, replicate_result);
@ -12036,9 +12127,22 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
bool predicate_written = false; bool predicate_written = false;
DxbcSourceOperand dxbc_operands[3]; DxbcSourceOperand dxbc_operands[3];
// Whether the operand is the same as any previous operand, and thus is loaded
// only once.
bool operands_duplicate[3] = {};
uint32_t operand_lengths[3]; uint32_t operand_lengths[3];
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) { for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
LoadDxbcSourceOperand(instr.operands[i], dxbc_operands[i]); const InstructionOperand& operand = instr.operands[i];
for (uint32_t j = 0; j < i; ++j) {
if (operand == instr.operands[j]) {
operands_duplicate[i] = true;
dxbc_operands[i] = dxbc_operands[j];
break;
}
}
if (!operands_duplicate[i]) {
LoadDxbcSourceOperand(operand, dxbc_operands[i]);
}
operand_lengths[i] = DxbcSourceOperandLength(dxbc_operands[i]); operand_lengths[i] = DxbcSourceOperandLength(dxbc_operands[i]);
} }
@ -12098,8 +12202,6 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
switch (instr.scalar_opcode) { switch (instr.scalar_opcode) {
case AluScalarOpcode::kAdds: case AluScalarOpcode::kAdds:
case AluScalarOpcode::kMaxs:
case AluScalarOpcode::kMins:
case AluScalarOpcode::kSubs: { case AluScalarOpcode::kSubs: {
bool subtract = instr.scalar_opcode == AluScalarOpcode::kSubs; bool subtract = instr.scalar_opcode == AluScalarOpcode::kSubs;
shader_code_.push_back( shader_code_.push_back(
@ -12144,14 +12246,16 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1); UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (instr.operands[0].components[0] != instr.operands[0].components[1]) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them is // Get the non-NaN multiplicand closer to zero to check if any of them
// zero. // is zero.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + 2 * DxbcSourceOperandLength(dxbc_operands[0], false, true))); 3 +
2 * DxbcSourceOperandLength(dxbc_operands[0], false, true)));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp); shader_code_.push_back(is_subnormal_temp);
@ -12175,7 +12279,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Zero the result if any multiplicand is zero. // Zero the result if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
@ -12193,6 +12298,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
// Release is_subnormal_temp. // Release is_subnormal_temp.
PopSystemTemp(); PopSystemTemp();
}
} break; } break;
case AluScalarOpcode::kMulsPrev: { case AluScalarOpcode::kMulsPrev: {
@ -12382,6 +12488,36 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
PopSystemTemp(); PopSystemTemp();
} break; } break;
case AluScalarOpcode::kMaxs:
case AluScalarOpcode::kMins: {
// max is commonly used as mov.
if (instr.operands[0].components[0] == instr.operands[0].components[1]) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
operand_lengths[0]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
++stat_.instruction_count;
++stat_.mov_instruction_count;
} else {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
kCoreOpcodes[uint32_t(instr.scalar_opcode)]) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + 2 * operand_lengths[0]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
} break;
case AluScalarOpcode::kSeqs: case AluScalarOpcode::kSeqs:
case AluScalarOpcode::kSgts: case AluScalarOpcode::kSgts:
case AluScalarOpcode::kSges: case AluScalarOpcode::kSges:
@ -12666,7 +12802,20 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.conversion_instruction_count; ++stat_.conversion_instruction_count;
// The `ps = max(src0.x, src0.y)` part. // The `ps = max(src0.x, src0.y)` part.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) | if (instr.operands[0].components[0] == instr.operands[0].components[1]) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
operand_lengths[0]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
++stat_.instruction_count;
++stat_.mov_instruction_count;
} else {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + 2 * operand_lengths[0])); 3 + 2 * operand_lengths[0]));
shader_code_.push_back( shader_code_.push_back(
@ -12676,6 +12825,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1); UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
}
break; break;
case AluScalarOpcode::kSubsPrev: case AluScalarOpcode::kSubsPrev:
@ -12970,10 +13120,11 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0); UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them is // Get the non-NaN multiplicand closer to zero to check if any of them
// zero. // is zero.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -13002,7 +13153,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Zero the result if any multiplicand is zero. // Zero the result if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
@ -13020,6 +13172,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
// Release is_subnormal_temp. // Release is_subnormal_temp.
PopSystemTemp(); PopSystemTemp();
}
} break; } break;
case AluScalarOpcode::kAddsc0: case AluScalarOpcode::kAddsc0:

View File

@ -180,6 +180,28 @@ struct InstructionOperand {
} }
return false; return false;
} }
// Whether absolute values of two operands are identical (useful for emulating
// Shader Model 3 0*anything=0 multiplication behavior).
bool EqualsAbsolute(const InstructionOperand& other) const {
if (storage_source != other.storage_source ||
storage_index != other.storage_index ||
storage_addressing_mode != other.storage_addressing_mode ||
component_count != other.component_count) {
return false;
}
for (int i = 0; i < component_count; ++i) {
if (components[i] != other.components[i]) {
return false;
}
}
return true;
}
bool operator==(const InstructionOperand& other) const {
return EqualsAbsolute(other) && is_negated == other.is_negated &&
is_absolute_value == other.is_absolute_value;
}
}; };
struct ParsedExecInstruction { struct ParsedExecInstruction {