[D3D12] ROV 7e3 writing

This commit is contained in:
Triang3l 2018-10-13 19:10:25 +03:00
parent c8a48f9c1c
commit 148a713280
1 changed files with 300 additions and 1 deletions

View File

@ -1754,7 +1754,306 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
// Release fixed_temp.
PopSystemTemp();
// TODO(Triang3l): Convert to 7e3.
// ***************************************************************************
// 7e3 conversion begins here.
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
// ***************************************************************************
// Check if the target format is 7e3 and the conversion is needed (this is
// pretty long, better to branch here).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(flags_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
uint32_t f10_temp1 = PushSystemTemp(), f10_temp2 = PushSystemTemp();
// Assuming RGB is already clamped to [0.0, 31.875], and alpha is a float and
// already clamped and multiplied by 3 to get [0.0, 3.0].
// Calculate the denormalized value if the numbers are too small to be
// represented as normalized 7e3 into f10_temp1.
// t1 = f32 & 0x7FFFFF
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0x7FFFFF);
shader_code_.push_back(0x7FFFFF);
shader_code_.push_back(0x7FFFFF);
shader_code_.push_back(0x7FFFFF);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// t1 = (f32 & 0x7FFFFF) | 0x800000
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0x800000);
shader_code_.push_back(0x800000);
shader_code_.push_back(0x800000);
shader_code_.push_back(0x800000);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// t2 = f32 >> 23
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(f10_temp2);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(23);
shader_code_.push_back(23);
shader_code_.push_back(23);
shader_code_.push_back(23);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// t2 = 125 - (f32 >> 23)
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(f10_temp2);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(125);
shader_code_.push_back(125);
shader_code_.push_back(125);
shader_code_.push_back(125);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(D3D10_SB_OPERAND_MODIFIER_NEG));
shader_code_.push_back(f10_temp2);
++stat_.instruction_count;
++stat_.int_instruction_count;
// t1 = ((f32 & 0x7FFFFF) | 0x800000) >> (125 - (f32 >> 23))
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(f10_temp2);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Check if the numbers are too small to be represented as a normalized 7e3.
// t2 = f32 < 0x3E800000
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(f10_temp2);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0x3E800000);
shader_code_.push_back(0x3E800000);
shader_code_.push_back(0x3E800000);
shader_code_.push_back(0x3E800000);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Bias the exponent.
// f32 += 0xC2000000
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0xC2000000u);
shader_code_.push_back(0xC2000000u);
shader_code_.push_back(0xC2000000u);
shader_code_.push_back(0xC2000000u);
++stat_.instruction_count;
++stat_.int_instruction_count;
// Replace the number in f32 with a denormalized one if needed.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(f10_temp2);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Build the 7e3 numbers.
// t1 = f32 >> 16
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(16);
shader_code_.push_back(16);
shader_code_.push_back(16);
shader_code_.push_back(16);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// t1 = (f32 >> 16) & 1
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(f10_temp1);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(1);
shader_code_.push_back(1);
shader_code_.push_back(1);
shader_code_.push_back(1);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// f10 = f32 + 0x7FFF
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0x7FFF);
shader_code_.push_back(0x7FFF);
shader_code_.push_back(0x7FFF);
shader_code_.push_back(0x7FFF);
++stat_.instruction_count;
++stat_.int_instruction_count;
// f10 = f32 + 0x7FFF + ((f32 >> 16) & 1)
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(f10_temp1);
++stat_.instruction_count;
++stat_.int_instruction_count;
// f10 = (f32 + 0x7FFF + ((f32 >> 16) & 1)) >> 16
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(16);
shader_code_.push_back(16);
shader_code_.push_back(16);
shader_code_.push_back(16);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// f10 = ((f32 + 0x7FFF + ((f32 >> 16) & 1)) >> 16) & 0x3FF
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0x3FF);
shader_code_.push_back(0x3FF);
shader_code_.push_back(0x3FF);
shader_code_.push_back(0x3FF);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Release f10_temp1 and f10_temp2.
PopSystemTemp(2);
// Convert alpha to fixed-point.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1));
shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(source_and_scratch_temp);
++stat_.instruction_count;
++stat_.conversion_instruction_count;
// 7e3 conversion done.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// ***************************************************************************
// 7e3 conversion ends here.
// ***************************************************************************
// Convert to 16-bit float.
uint32_t f16_temp = PushSystemTemp();