[D3D12] ROV 7e3 writing
This commit is contained in:
parent
c8a48f9c1c
commit
148a713280
|
@ -1754,7 +1754,306 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
|||
// Release fixed_temp.
|
||||
PopSystemTemp();
|
||||
|
||||
// TODO(Triang3l): Convert to 7e3.
|
||||
// ***************************************************************************
|
||||
// 7e3 conversion begins here.
|
||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
// ***************************************************************************
|
||||
|
||||
// Check if the target format is 7e3 and the conversion is needed (this is
|
||||
// pretty long, better to branch here).
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
||||
shader_code_.push_back(flags_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
uint32_t f10_temp1 = PushSystemTemp(), f10_temp2 = PushSystemTemp();
|
||||
|
||||
// Assuming RGB is already clamped to [0.0, 31.875], and alpha is a float and
|
||||
// already clamped and multiplied by 3 to get [0.0, 3.0].
|
||||
|
||||
// Calculate the denormalized value if the numbers are too small to be
|
||||
// represented as normalized 7e3 into f10_temp1.
|
||||
|
||||
// t1 = f32 & 0x7FFFFF
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0x7FFFFF);
|
||||
shader_code_.push_back(0x7FFFFF);
|
||||
shader_code_.push_back(0x7FFFFF);
|
||||
shader_code_.push_back(0x7FFFFF);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// t1 = (f32 & 0x7FFFFF) | 0x800000
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0x800000);
|
||||
shader_code_.push_back(0x800000);
|
||||
shader_code_.push_back(0x800000);
|
||||
shader_code_.push_back(0x800000);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// t2 = f32 >> 23
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(f10_temp2);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(23);
|
||||
shader_code_.push_back(23);
|
||||
shader_code_.push_back(23);
|
||||
shader_code_.push_back(23);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// t2 = 125 - (f32 >> 23)
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(f10_temp2);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(125);
|
||||
shader_code_.push_back(125);
|
||||
shader_code_.push_back(125);
|
||||
shader_code_.push_back(125);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1) |
|
||||
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(D3D10_SB_OPERAND_MODIFIER_NEG));
|
||||
shader_code_.push_back(f10_temp2);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// t1 = ((f32 & 0x7FFFFF) | 0x800000) >> (125 - (f32 >> 23))
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(f10_temp2);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Check if the numbers are too small to be represented as a normalized 7e3.
|
||||
// t2 = f32 < 0x3E800000
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(f10_temp2);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0x3E800000);
|
||||
shader_code_.push_back(0x3E800000);
|
||||
shader_code_.push_back(0x3E800000);
|
||||
shader_code_.push_back(0x3E800000);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Bias the exponent.
|
||||
// f32 += 0xC2000000
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0xC2000000u);
|
||||
shader_code_.push_back(0xC2000000u);
|
||||
shader_code_.push_back(0xC2000000u);
|
||||
shader_code_.push_back(0xC2000000u);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// Replace the number in f32 with a denormalized one if needed.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(f10_temp2);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.movc_instruction_count;
|
||||
|
||||
// Build the 7e3 numbers.
|
||||
// t1 = f32 >> 16
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(16);
|
||||
shader_code_.push_back(16);
|
||||
shader_code_.push_back(16);
|
||||
shader_code_.push_back(16);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// t1 = (f32 >> 16) & 1
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(1);
|
||||
shader_code_.push_back(1);
|
||||
shader_code_.push_back(1);
|
||||
shader_code_.push_back(1);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// f10 = f32 + 0x7FFF
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0x7FFF);
|
||||
shader_code_.push_back(0x7FFF);
|
||||
shader_code_.push_back(0x7FFF);
|
||||
shader_code_.push_back(0x7FFF);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// f10 = f32 + 0x7FFF + ((f32 >> 16) & 1)
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(f10_temp1);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// f10 = (f32 + 0x7FFF + ((f32 >> 16) & 1)) >> 16
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(16);
|
||||
shader_code_.push_back(16);
|
||||
shader_code_.push_back(16);
|
||||
shader_code_.push_back(16);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// f10 = ((f32 + 0x7FFF + ((f32 >> 16) & 1)) >> 16) & 0x3FF
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0x3FF);
|
||||
shader_code_.push_back(0x3FF);
|
||||
shader_code_.push_back(0x3FF);
|
||||
shader_code_.push_back(0x3FF);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Release f10_temp1 and f10_temp2.
|
||||
PopSystemTemp(2);
|
||||
|
||||
// Convert alpha to fixed-point.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
|
||||
// 7e3 conversion done.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
|
||||
// ***************************************************************************
|
||||
// 7e3 conversion ends here.
|
||||
// ***************************************************************************
|
||||
|
||||
// Convert to 16-bit float.
|
||||
uint32_t f16_temp = PushSystemTemp();
|
||||
|
|
Loading…
Reference in New Issue