[D3D12] ROV: 64bpp load/store
This commit is contained in:
parent
eb8596d72a
commit
5395183d98
|
@ -1411,7 +1411,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
|||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Load the low 32 bits.
|
||||
// Allocate temporary registers for unpacking pixels.
|
||||
uint32_t pack_width_temp = PushSystemTemp();
|
||||
uint32_t pack_offset_temp = PushSystemTemp();
|
||||
|
||||
// Load the lower 32 bits.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_UAV_TYPED) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||
|
@ -1428,12 +1432,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
|||
++stat_.instruction_count;
|
||||
++stat_.texture_load_instructions;
|
||||
|
||||
// Unpack the low 32 bits, as signed because of k_16_16 and k_16_16_16_16
|
||||
// Unpack the lower 32 bits, as signed because of k_16_16 and k_16_16_16_16
|
||||
// (will be masked later if needed).
|
||||
uint32_t pack_width_low_temp = PushSystemTemp();
|
||||
uint32_t pack_offset_low_temp = PushSystemTemp();
|
||||
CompletePixelShader_WriteToROV_ExtractPackLayout(
|
||||
rt_index, false, pack_width_low_temp, pack_offset_low_temp);
|
||||
rt_index, false, pack_width_temp, pack_offset_temp);
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
|
@ -1441,16 +1443,97 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
|||
shader_code_.push_back(target_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_width_low_temp);
|
||||
shader_code_.push_back(pack_width_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_offset_low_temp);
|
||||
shader_code_.push_back(pack_offset_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
|
||||
shader_code_.push_back(target_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
// Release pack_width_low_temp and pack_offset_low_temp.
|
||||
|
||||
// Check if need to load the upper 32 bits.
|
||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTPackWidthHigh_Index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_EDRAMRTPackWidthHigh_Vec);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
// Allocate a register for the components from the upper 32 bits (will be
|
||||
// combined with the lower using OR).
|
||||
uint32_t high_temp = PushSystemTemp();
|
||||
|
||||
// Load the upper 32 bits.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_UAV_TYPED) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(high_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1));
|
||||
shader_code_.push_back(edram_dword_offset_high_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 2));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(0);
|
||||
++stat_.instruction_count;
|
||||
++stat_.texture_load_instructions;
|
||||
|
||||
// Unpack the higher 32 bits.
|
||||
CompletePixelShader_WriteToROV_ExtractPackLayout(
|
||||
rt_index, true, pack_width_temp, pack_offset_temp);
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(high_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_width_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_offset_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
|
||||
shader_code_.push_back(high_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// Combine the components from the lower and the upper 32 bits. In ibfe, if
|
||||
// width is 0, the result is 0 (not 0xFFFFFFFF), so it's fine to do this
|
||||
// without pre-masking.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(target_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(target_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(high_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Release high_temp.
|
||||
PopSystemTemp();
|
||||
|
||||
// Upper 32 bits loaded.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
|
||||
// Release pack_width_temp and pack_offset_temp.
|
||||
PopSystemTemp(2);
|
||||
|
||||
// Mask the components to differentiate between signed and unsigned.
|
||||
|
@ -1472,8 +1555,6 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
|||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// TODO(Triang3l): 64bpp loading and unpacking.
|
||||
|
||||
// Convert from fixed-point.
|
||||
uint32_t fixed_temp = PushSystemTemp();
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ITOF) |
|
||||
|
@ -2587,50 +2668,70 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
|||
// Release f16_temp.
|
||||
PopSystemTemp();
|
||||
|
||||
// Pack and store the low 32 bits.
|
||||
uint32_t pack_temp = PushSystemTemp(true);
|
||||
// Pack and store the lower and the upper 32 bits.
|
||||
uint32_t pack_temp = PushSystemTemp();
|
||||
uint32_t pack_width_temp = PushSystemTemp();
|
||||
uint32_t pack_offset_temp = PushSystemTemp();
|
||||
|
||||
// 1) Insert color components into different vector components.
|
||||
uint32_t pack_width_low_temp = PushSystemTemp();
|
||||
uint32_t pack_offset_low_temp = PushSystemTemp();
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
if (i != 0) {
|
||||
// Check if need to store the upper 32 bits.
|
||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTPackWidthHigh_Index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_EDRAMRTPackWidthHigh_Vec);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
}
|
||||
|
||||
// Insert color components into different vector components.
|
||||
CompletePixelShader_WriteToROV_ExtractPackLayout(
|
||||
rt_index, false, pack_width_low_temp, pack_offset_low_temp);
|
||||
rt_index, i != 0, pack_width_temp, pack_offset_temp);
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_BFI) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(pack_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_width_low_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_offset_low_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_width_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_offset_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(0);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
// Release pack_width_low_temp and pack_offset_low_temp.
|
||||
PopSystemTemp(2);
|
||||
// 2) Merge XY and ZW.
|
||||
|
||||
// Merge XY and ZW.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
|
||||
shader_code_.push_back(pack_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01001110, 1));
|
||||
shader_code_.push_back(pack_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
// 3) Merge X and Y.
|
||||
|
||||
// Merge X and Y.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
|
@ -2644,7 +2745,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
|||
shader_code_.push_back(pack_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
// 4) Write the low 32 bits.
|
||||
|
||||
// Write the dword.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_STORE_UAV_TYPED) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||
|
@ -2654,20 +2756,25 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
|||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1));
|
||||
shader_code_.push_back(edram_dword_offset_low_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(i ? edram_dword_offset_high_temp
|
||||
: edram_dword_offset_low_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(pack_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.c_texture_store_instructions;
|
||||
|
||||
// Release pack_temp.
|
||||
PopSystemTemp();
|
||||
if (i != 0) {
|
||||
// Upper 32 bits stored.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(Triang3l): 64bpp packing and storing.
|
||||
|
||||
// Release flags_temp.
|
||||
PopSystemTemp();
|
||||
// Release pack_temp, pack_width_temp, pack_offset_temp and flags_temp.
|
||||
PopSystemTemp(4);
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||
|
|
Loading…
Reference in New Issue