[D3D12] ROV: 64bpp load/store

This commit is contained in:
Triang3l 2018-10-14 14:38:04 +03:00
parent eb8596d72a
commit 5395183d98
1 changed files with 194 additions and 87 deletions

View File

@ -1411,7 +1411,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// Load the low 32 bits. // Allocate temporary registers for unpacking pixels.
uint32_t pack_width_temp = PushSystemTemp();
uint32_t pack_offset_temp = PushSystemTemp();
// Load the lower 32 bits.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_UAV_TYPED) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_UAV_TYPED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
@ -1428,12 +1432,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.texture_load_instructions; ++stat_.texture_load_instructions;
// Unpack the low 32 bits, as signed because of k_16_16 and k_16_16_16_16 // Unpack the lower 32 bits, as signed because of k_16_16 and k_16_16_16_16
// (will be masked later if needed). // (will be masked later if needed).
uint32_t pack_width_low_temp = PushSystemTemp();
uint32_t pack_offset_low_temp = PushSystemTemp();
CompletePixelShader_WriteToROV_ExtractPackLayout( CompletePixelShader_WriteToROV_ExtractPackLayout(
rt_index, false, pack_width_low_temp, pack_offset_low_temp); rt_index, false, pack_width_temp, pack_offset_temp);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
@ -1441,16 +1443,97 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
shader_code_.push_back(target_temp); shader_code_.push_back(target_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_width_low_temp); shader_code_.push_back(pack_width_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_offset_low_temp); shader_code_.push_back(pack_offset_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1)); EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
shader_code_.push_back(target_temp); shader_code_.push_back(target_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.int_instruction_count; ++stat_.int_instruction_count;
// Release pack_width_low_temp and pack_offset_low_temp.
// Check if need to load the upper 32 bits.
system_constants_used_ |= 1ull << kSysConst_EDRAMRTPackWidthHigh_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMRTPackWidthHigh_Vec);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Allocate a register for the components from the upper 32 bits (will be
// combined with the lower using OR).
uint32_t high_temp = PushSystemTemp();
// Load the upper 32 bits.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_UAV_TYPED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(high_temp);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1));
shader_code_.push_back(edram_dword_offset_high_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 2));
shader_code_.push_back(0);
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.texture_load_instructions;
// Unpack the higher 32 bits.
CompletePixelShader_WriteToROV_ExtractPackLayout(
rt_index, true, pack_width_temp, pack_offset_temp);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(high_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_width_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_offset_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
shader_code_.push_back(high_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
// Combine the components from the lower and the upper 32 bits. In ibfe, if
// width is 0, the result is 0 (not 0xFFFFFFFF), so it's fine to do this
// without pre-masking.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(target_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(high_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Release high_temp.
PopSystemTemp();
// Upper 32 bits loaded.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// Release pack_width_temp and pack_offset_temp.
PopSystemTemp(2); PopSystemTemp(2);
// Mask the components to differentiate between signed and unsigned. // Mask the components to differentiate between signed and unsigned.
@ -1472,8 +1555,6 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// TODO(Triang3l): 64bpp loading and unpacking.
// Convert from fixed-point. // Convert from fixed-point.
uint32_t fixed_temp = PushSystemTemp(); uint32_t fixed_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ITOF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ITOF) |
@ -2587,50 +2668,70 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
// Release f16_temp. // Release f16_temp.
PopSystemTemp(); PopSystemTemp();
// Pack and store the low 32 bits. // Pack and store the lower and the upper 32 bits.
uint32_t pack_temp = PushSystemTemp(true); uint32_t pack_temp = PushSystemTemp();
uint32_t pack_width_temp = PushSystemTemp();
uint32_t pack_offset_temp = PushSystemTemp();
// 1) Insert color components into different vector components. for (uint32_t i = 0; i < 2; ++i) {
uint32_t pack_width_low_temp = PushSystemTemp(); if (i != 0) {
uint32_t pack_offset_low_temp = PushSystemTemp(); // Check if need to store the upper 32 bits.
system_constants_used_ |= 1ull << kSysConst_EDRAMRTPackWidthHigh_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMRTPackWidthHigh_Vec);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
}
// Insert color components into different vector components.
CompletePixelShader_WriteToROV_ExtractPackLayout( CompletePixelShader_WriteToROV_ExtractPackLayout(
rt_index, false, pack_width_low_temp, pack_offset_low_temp); rt_index, i != 0, pack_width_temp, pack_offset_temp);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_BFI) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_BFI) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(pack_temp); shader_code_.push_back(pack_temp);
shader_code_.push_back( shader_code_.push_back(EncodeVectorSwizzledOperand(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_width_low_temp); shader_code_.push_back(pack_width_temp);
shader_code_.push_back( shader_code_.push_back(EncodeVectorSwizzledOperand(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_offset_low_temp); shader_code_.push_back(pack_offset_temp);
shader_code_.push_back( shader_code_.push_back(EncodeVectorSwizzledOperand(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(source_and_scratch_temp); shader_code_.push_back(source_and_scratch_temp);
shader_code_.push_back( shader_code_.push_back(EncodeVectorSwizzledOperand(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(pack_temp); shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// Release pack_width_low_temp and pack_offset_low_temp.
PopSystemTemp(2); // Merge XY and ZW.
// 2) Merge XY and ZW.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
shader_code_.push_back(pack_temp); shader_code_.push_back(pack_temp);
shader_code_.push_back( shader_code_.push_back(EncodeVectorSwizzledOperand(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_temp); shader_code_.push_back(pack_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01001110, 1)); EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01001110, 1));
shader_code_.push_back(pack_temp); shader_code_.push_back(pack_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// 3) Merge X and Y.
// Merge X and Y.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
@ -2644,7 +2745,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
shader_code_.push_back(pack_temp); shader_code_.push_back(pack_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// 4) Write the low 32 bits.
// Write the dword.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_STORE_UAV_TYPED) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_STORE_UAV_TYPED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
@ -2654,20 +2756,25 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
shader_code_.push_back(0); shader_code_.push_back(0);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1)); EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1));
shader_code_.push_back(edram_dword_offset_low_temp); shader_code_.push_back(i ? edram_dword_offset_high_temp
shader_code_.push_back( : edram_dword_offset_low_temp);
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_temp); shader_code_.push_back(pack_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.c_texture_store_instructions; ++stat_.c_texture_store_instructions;
// Release pack_temp. if (i != 0) {
PopSystemTemp(); // Upper 32 bits stored.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
}
// TODO(Triang3l): 64bpp packing and storing. // Release pack_temp, pack_width_temp, pack_offset_temp and flags_temp.
PopSystemTemp(4);
// Release flags_temp.
PopSystemTemp();
} }
void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {