[D3D12] DXBC: Skip loading and some ALU ops for identical operands
This commit is contained in:
parent
352a443c67
commit
1ee3ed03fd
|
@ -10850,10 +10850,30 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
// while storing.
|
// while storing.
|
||||||
bool replicate_result = false;
|
bool replicate_result = false;
|
||||||
|
|
||||||
|
// A small shortcut, operands of cube are the same, but swizzled.
|
||||||
|
uint32_t operand_count;
|
||||||
|
if (instr.vector_opcode == AluVectorOpcode::kCube) {
|
||||||
|
operand_count = 1;
|
||||||
|
} else {
|
||||||
|
operand_count = uint32_t(instr.operand_count);
|
||||||
|
}
|
||||||
DxbcSourceOperand dxbc_operands[3];
|
DxbcSourceOperand dxbc_operands[3];
|
||||||
|
// Whether the operand is the same as any previous operand, and thus is loaded
|
||||||
|
// only once.
|
||||||
|
bool operands_duplicate[3] = {};
|
||||||
uint32_t operand_length_sums[3];
|
uint32_t operand_length_sums[3];
|
||||||
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
for (uint32_t i = 0; i < operand_count; ++i) {
|
||||||
LoadDxbcSourceOperand(instr.operands[i], dxbc_operands[i]);
|
const InstructionOperand& operand = instr.operands[i];
|
||||||
|
for (uint32_t j = 0; j < i; ++j) {
|
||||||
|
if (operand == instr.operands[j]) {
|
||||||
|
operands_duplicate[i] = true;
|
||||||
|
dxbc_operands[i] = dxbc_operands[j];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!operands_duplicate[i]) {
|
||||||
|
LoadDxbcSourceOperand(operand, dxbc_operands[i]);
|
||||||
|
}
|
||||||
operand_length_sums[i] = DxbcSourceOperandLength(dxbc_operands[i]);
|
operand_length_sums[i] = DxbcSourceOperandLength(dxbc_operands[i]);
|
||||||
if (i != 0) {
|
if (i != 0) {
|
||||||
operand_length_sums[i] += operand_length_sums[i - 1];
|
operand_length_sums[i] += operand_length_sums[i - 1];
|
||||||
|
@ -10896,12 +10916,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
|
|
||||||
switch (instr.vector_opcode) {
|
switch (instr.vector_opcode) {
|
||||||
case AluVectorOpcode::kAdd:
|
case AluVectorOpcode::kAdd:
|
||||||
case AluVectorOpcode::kMax:
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
// max is commonly used as mov, but probably better not to convert it to
|
|
||||||
// make sure things like flusing denormals aren't affected.
|
|
||||||
case AluVectorOpcode::kMin:
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
|
|
||||||
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
3 + operand_length_sums[1]));
|
3 + operand_length_sums[1]));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -10924,6 +10939,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
UseDxbcSourceOperand(dxbc_operands[1]);
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0),
|
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0),
|
||||||
// flushing denormals (must be done using eq - doing bitwise comparison
|
// flushing denormals (must be done using eq - doing bitwise comparison
|
||||||
// doesn't flush denormals).
|
// doesn't flush denormals).
|
||||||
|
@ -10931,8 +10947,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
// image missing because rcp(0) is multiplied by 0, which results in NaN
|
// image missing because rcp(0) is multiplied by 0, which results in NaN
|
||||||
// rather than 0.
|
// rather than 0.
|
||||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||||
// Get the non-NaN multiplicand closer to zero to check if any of them is
|
// Get the non-NaN multiplicand closer to zero to check if any of them
|
||||||
// zero.
|
// is zero.
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
@ -10947,7 +10963,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
// Check if any multiplicand is zero (min isn't required to flush
|
// Check if any multiplicand is zero (min isn't required to flush
|
||||||
// denormals in the result).
|
// denormals in the result).
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
@ -10964,7 +10981,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
// Zero the result if any multiplicand is zero.
|
// Zero the result if any multiplicand is zero.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
@ -10985,8 +11003,39 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
// Release is_subnormal_temp.
|
// Release is_subnormal_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
|
case AluVectorOpcode::kMax:
|
||||||
|
case AluVectorOpcode::kMin:
|
||||||
|
// max is commonly used as mov.
|
||||||
|
if (operands_duplicate[1]) {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[0]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
} else {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
|
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[1]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case AluVectorOpcode::kSeq:
|
case AluVectorOpcode::kSeq:
|
||||||
case AluVectorOpcode::kSgt:
|
case AluVectorOpcode::kSgt:
|
||||||
case AluVectorOpcode::kSge:
|
case AluVectorOpcode::kSge:
|
||||||
|
@ -11054,11 +11103,12 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
UseDxbcSourceOperand(dxbc_operands[2]);
|
UseDxbcSourceOperand(dxbc_operands[2]);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||||
// If any operand is zero or denormalized, just leave the addition part.
|
// If any operand is zero or denormalized, just leave the addition part.
|
||||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||||
// Get the non-NaN multiplicand closer to zero to check if any of them is
|
// Get the non-NaN multiplicand closer to zero to check if any of them
|
||||||
// zero.
|
// is zero.
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
@ -11073,7 +11123,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
// Check if any multiplicand is zero (min isn't required to flush
|
// Check if any multiplicand is zero (min isn't required to flush
|
||||||
// denormals in the result).
|
// denormals in the result).
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
@ -11108,6 +11159,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
// Release is_subnormal_temp.
|
// Release is_subnormal_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
// Using true eq to compare with zero because it handles denormals and -0.
|
// Using true eq to compare with zero because it handles denormals and -0.
|
||||||
|
@ -11158,6 +11210,22 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
case AluVectorOpcode::kDp4:
|
case AluVectorOpcode::kDp4:
|
||||||
case AluVectorOpcode::kDp3:
|
case AluVectorOpcode::kDp3:
|
||||||
case AluVectorOpcode::kDp2Add: {
|
case AluVectorOpcode::kDp2Add: {
|
||||||
|
if (instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||||
|
// The operands are the same when calculating vector length, no need to
|
||||||
|
// emulate 0 * anything = 0 in this case.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
|
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[1]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
} else {
|
||||||
uint32_t operand_mask;
|
uint32_t operand_mask;
|
||||||
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
|
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
|
||||||
operand_mask = 0b0011;
|
operand_mask = 0b0011;
|
||||||
|
@ -11187,7 +11255,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(0);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
10 + operand_length_sums[0]));
|
10 + operand_length_sums[0]));
|
||||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
@ -11241,7 +11310,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
// Calculate the dot product.
|
// Calculate the dot product.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -11257,6 +11327,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
// Release src1_temp.
|
// Release src1_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
}
|
||||||
// Add src2.x for dp2add.
|
// Add src2.x for dp2add.
|
||||||
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
|
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -11839,12 +11910,13 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1);
|
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||||
// This is an attenuation calculation function, so infinity is probably
|
// This is an attenuation calculation function, so infinity is probably
|
||||||
// not very unlikely.
|
// not very unlikely.
|
||||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||||
// Get the non-NaN multiplicand closer to zero to check if any of them is
|
// Get the non-NaN multiplicand closer to zero to check if any of them
|
||||||
// zero.
|
// is zero.
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
@ -11873,7 +11945,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
// Set pv.y to zero if any multiplicand is zero.
|
// Set pv.y to zero if any multiplicand is zero.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
|
||||||
|
@ -11891,6 +11964,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
// Release is_subnormal_temp.
|
// Release is_subnormal_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
}
|
||||||
// pv.z = src0.z
|
// pv.z = src0.z
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
@ -11992,7 +12066,20 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.conversion_instruction_count;
|
++stat_.conversion_instruction_count;
|
||||||
// The `pv = max(src0, src1)` part.
|
// The `pv = max(src0, src1)` part.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
if (operands_duplicate[1]) {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[0]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
} else {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
3 + operand_length_sums[1]));
|
3 + operand_length_sums[1]));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -12002,6 +12089,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
UseDxbcSourceOperand(dxbc_operands[1]);
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -12010,8 +12098,11 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
for (uint32_t i = 0; i < operand_count; ++i) {
|
||||||
UnloadDxbcSourceOperand(dxbc_operands[instr.operand_count - 1 - i]);
|
uint32_t operand_index = operand_count - 1 - i;
|
||||||
|
if (!operands_duplicate[operand_index]) {
|
||||||
|
UnloadDxbcSourceOperand(dxbc_operands[operand_index]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StoreResult(instr.result, system_temp_pv_, replicate_result);
|
StoreResult(instr.result, system_temp_pv_, replicate_result);
|
||||||
|
@ -12036,9 +12127,22 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
bool predicate_written = false;
|
bool predicate_written = false;
|
||||||
|
|
||||||
DxbcSourceOperand dxbc_operands[3];
|
DxbcSourceOperand dxbc_operands[3];
|
||||||
|
// Whether the operand is the same as any previous operand, and thus is loaded
|
||||||
|
// only once.
|
||||||
|
bool operands_duplicate[3] = {};
|
||||||
uint32_t operand_lengths[3];
|
uint32_t operand_lengths[3];
|
||||||
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
||||||
LoadDxbcSourceOperand(instr.operands[i], dxbc_operands[i]);
|
const InstructionOperand& operand = instr.operands[i];
|
||||||
|
for (uint32_t j = 0; j < i; ++j) {
|
||||||
|
if (operand == instr.operands[j]) {
|
||||||
|
operands_duplicate[i] = true;
|
||||||
|
dxbc_operands[i] = dxbc_operands[j];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!operands_duplicate[i]) {
|
||||||
|
LoadDxbcSourceOperand(operand, dxbc_operands[i]);
|
||||||
|
}
|
||||||
operand_lengths[i] = DxbcSourceOperandLength(dxbc_operands[i]);
|
operand_lengths[i] = DxbcSourceOperandLength(dxbc_operands[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12098,8 +12202,6 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
|
|
||||||
switch (instr.scalar_opcode) {
|
switch (instr.scalar_opcode) {
|
||||||
case AluScalarOpcode::kAdds:
|
case AluScalarOpcode::kAdds:
|
||||||
case AluScalarOpcode::kMaxs:
|
|
||||||
case AluScalarOpcode::kMins:
|
|
||||||
case AluScalarOpcode::kSubs: {
|
case AluScalarOpcode::kSubs: {
|
||||||
bool subtract = instr.scalar_opcode == AluScalarOpcode::kSubs;
|
bool subtract = instr.scalar_opcode == AluScalarOpcode::kSubs;
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -12144,14 +12246,16 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
|
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
if (instr.operands[0].components[0] != instr.operands[0].components[1]) {
|
||||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||||
// Get the non-NaN multiplicand closer to zero to check if any of them is
|
// Get the non-NaN multiplicand closer to zero to check if any of them
|
||||||
// zero.
|
// is zero.
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
3 + 2 * DxbcSourceOperandLength(dxbc_operands[0], false, true)));
|
3 +
|
||||||
|
2 * DxbcSourceOperandLength(dxbc_operands[0], false, true)));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
shader_code_.push_back(is_subnormal_temp);
|
shader_code_.push_back(is_subnormal_temp);
|
||||||
|
@ -12175,7 +12279,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
// Zero the result if any multiplicand is zero.
|
// Zero the result if any multiplicand is zero.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
@ -12193,6 +12298,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
// Release is_subnormal_temp.
|
// Release is_subnormal_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case AluScalarOpcode::kMulsPrev: {
|
case AluScalarOpcode::kMulsPrev: {
|
||||||
|
@ -12382,6 +12488,36 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
|
case AluScalarOpcode::kMaxs:
|
||||||
|
case AluScalarOpcode::kMins: {
|
||||||
|
// max is commonly used as mov.
|
||||||
|
if (instr.operands[0].components[0] == instr.operands[0].components[1]) {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
|
||||||
|
operand_lengths[0]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
} else {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
|
kCoreOpcodes[uint32_t(instr.scalar_opcode)]) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + 2 * operand_lengths[0]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
|
||||||
case AluScalarOpcode::kSeqs:
|
case AluScalarOpcode::kSeqs:
|
||||||
case AluScalarOpcode::kSgts:
|
case AluScalarOpcode::kSgts:
|
||||||
case AluScalarOpcode::kSges:
|
case AluScalarOpcode::kSges:
|
||||||
|
@ -12666,7 +12802,20 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.conversion_instruction_count;
|
++stat_.conversion_instruction_count;
|
||||||
// The `ps = max(src0.x, src0.y)` part.
|
// The `ps = max(src0.x, src0.y)` part.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
if (instr.operands[0].components[0] == instr.operands[0].components[1]) {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
|
||||||
|
operand_lengths[0]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
} else {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
3 + 2 * operand_lengths[0]));
|
3 + 2 * operand_lengths[0]));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -12676,6 +12825,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
|
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case AluScalarOpcode::kSubsPrev:
|
case AluScalarOpcode::kSubsPrev:
|
||||||
|
@ -12970,10 +13120,11 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
|
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||||
// Get the non-NaN multiplicand closer to zero to check if any of them is
|
// Get the non-NaN multiplicand closer to zero to check if any of them
|
||||||
// zero.
|
// is zero.
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
@ -13002,7 +13153,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
// Zero the result if any multiplicand is zero.
|
// Zero the result if any multiplicand is zero.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
@ -13020,6 +13172,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
// Release is_subnormal_temp.
|
// Release is_subnormal_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case AluScalarOpcode::kAddsc0:
|
case AluScalarOpcode::kAddsc0:
|
||||||
|
|
|
@ -180,6 +180,28 @@ struct InstructionOperand {
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Whether absolute values of two operands are identical (useful for emulating
|
||||||
|
// Shader Model 3 0*anything=0 multiplication behavior).
|
||||||
|
bool EqualsAbsolute(const InstructionOperand& other) const {
|
||||||
|
if (storage_source != other.storage_source ||
|
||||||
|
storage_index != other.storage_index ||
|
||||||
|
storage_addressing_mode != other.storage_addressing_mode ||
|
||||||
|
component_count != other.component_count) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < component_count; ++i) {
|
||||||
|
if (components[i] != other.components[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const InstructionOperand& other) const {
|
||||||
|
return EqualsAbsolute(other) && is_negated == other.is_negated &&
|
||||||
|
is_absolute_value == other.is_absolute_value;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ParsedExecInstruction {
|
struct ParsedExecInstruction {
|
||||||
|
|
Loading…
Reference in New Issue