[D3D12] ROV: Disable RTs when using ROV
This commit is contained in:
parent
6d2e74325c
commit
67e5cb8681
|
@ -46,7 +46,8 @@ PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
|
||||||
// Zero out tessellation, stream output, blend state and formats for render
|
// Zero out tessellation, stream output, blend state and formats for render
|
||||||
// targets 4+, node mask, cached PSO, flags and other things.
|
// targets 4+, node mask, cached PSO, flags and other things.
|
||||||
std::memset(&update_desc_, 0, sizeof(update_desc_));
|
std::memset(&update_desc_, 0, sizeof(update_desc_));
|
||||||
update_desc_.BlendState.IndependentBlendEnable = TRUE;
|
update_desc_.BlendState.IndependentBlendEnable =
|
||||||
|
edram_rov_used_ ? FALSE : TRUE;
|
||||||
update_desc_.SampleMask = UINT_MAX;
|
update_desc_.SampleMask = UINT_MAX;
|
||||||
update_desc_.SampleDesc.Count = 1;
|
update_desc_.SampleDesc.Count = 1;
|
||||||
}
|
}
|
||||||
|
@ -353,6 +354,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
||||||
PipelineCache::UpdateStatus PipelineCache::UpdateBlendStateAndRenderTargets(
|
PipelineCache::UpdateStatus PipelineCache::UpdateBlendStateAndRenderTargets(
|
||||||
D3D12Shader* pixel_shader,
|
D3D12Shader* pixel_shader,
|
||||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||||
|
if (edram_rov_used_) {
|
||||||
|
return current_pipeline_ == nullptr ? UpdateStatus::kMismatch
|
||||||
|
: UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
auto& regs = update_blend_state_and_render_targets_regs_;
|
auto& regs = update_blend_state_and_render_targets_regs_;
|
||||||
|
|
||||||
bool dirty = current_pipeline_ == nullptr;
|
bool dirty = current_pipeline_ == nullptr;
|
||||||
|
@ -624,6 +630,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
|
||||||
|
|
||||||
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState(
|
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState(
|
||||||
DXGI_FORMAT format) {
|
DXGI_FORMAT format) {
|
||||||
|
if (edram_rov_used_) {
|
||||||
|
return current_pipeline_ == nullptr ? UpdateStatus::kMismatch
|
||||||
|
: UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
auto& regs = update_depth_stencil_state_regs_;
|
auto& regs = update_depth_stencil_state_regs_;
|
||||||
|
|
||||||
bool dirty = current_pipeline_ == nullptr;
|
bool dirty = current_pipeline_ == nullptr;
|
||||||
|
|
|
@ -352,6 +352,10 @@ void RenderTargetCache::BeginFrame() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
||||||
|
if (IsROVUsedForEDRAM()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// There are two kinds of render target binding updates in this implementation
|
// There are two kinds of render target binding updates in this implementation
|
||||||
// in case something has been changed - full and partial.
|
// in case something has been changed - full and partial.
|
||||||
//
|
//
|
||||||
|
|
|
@ -62,8 +62,8 @@ using namespace ucode;
|
||||||
// second buffer in the descriptor array at b2, which is assigned to CB1, the
|
// second buffer in the descriptor array at b2, which is assigned to CB1, the
|
||||||
// index would be CB1[3][0].
|
// index would be CB1[3][0].
|
||||||
|
|
||||||
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rovs_used)
|
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rov_used)
|
||||||
: edram_rovs_used_(edram_rovs_used) {
|
: edram_rov_used_(edram_rov_used) {
|
||||||
// Don't allocate again and again for the first shader.
|
// Don't allocate again and again for the first shader.
|
||||||
shader_code_.reserve(8192);
|
shader_code_.reserve(8192);
|
||||||
shader_object_.reserve(16384);
|
shader_object_.reserve(16384);
|
||||||
|
@ -892,6 +892,75 @@ void DxbcShaderTranslator::CompleteVertexShader() {
|
||||||
++stat_.mov_instruction_count;
|
++stat_.mov_instruction_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
||||||
|
// Remap guest render target indices to host since because on the host, the
|
||||||
|
// indices of the bound render targets are consecutive. This is done using 16
|
||||||
|
// movc instructions because indexable temps are known to be causing
|
||||||
|
// performance issues on some Nvidia GPUs. In the map, the components are host
|
||||||
|
// render target indices, and the values are the guest ones.
|
||||||
|
uint32_t remap_movc_mask_temp = PushSystemTemp();
|
||||||
|
uint32_t remap_movc_target_temp = PushSystemTemp();
|
||||||
|
system_constants_used_ |= 1u << kSysConst_ColorOutputMap_Index;
|
||||||
|
// Host RT i, guest RT j.
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
// mask = map.iiii == (0, 1, 2, 3)
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(remap_movc_mask_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||||
|
shader_code_.push_back(kSysConst_ColorOutputMap_Vec);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(1);
|
||||||
|
shader_code_.push_back(2);
|
||||||
|
shader_code_.push_back(3);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.int_instruction_count;
|
||||||
|
for (uint32_t j = 0; j < 4; ++j) {
|
||||||
|
// If map.i == j, move guest color j to the temporary host color.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(remap_movc_target_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, j, 1));
|
||||||
|
shader_code_.push_back(remap_movc_mask_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(system_temp_color_[j]);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(remap_movc_target_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.movc_instruction_count;
|
||||||
|
}
|
||||||
|
// Write the remapped color to host render target i.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
|
||||||
|
shader_code_.push_back(i);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(remap_movc_target_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
}
|
||||||
|
// Free the temporary registers used for remapping.
|
||||||
|
PopSystemTemp(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
|
// TODO(Triang3l): Write the output to the EDRAM rasterizer-ordered view.
|
||||||
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader() {
|
void DxbcShaderTranslator::CompletePixelShader() {
|
||||||
// Alpha test.
|
// Alpha test.
|
||||||
// Check if alpha test is enabled (if the constant is not 0).
|
// Check if alpha test is enabled (if the constant is not 0).
|
||||||
|
@ -1126,68 +1195,12 @@ void DxbcShaderTranslator::CompletePixelShader() {
|
||||||
// Release gamma_toggle_temp and gamma_pieces_temp.
|
// Release gamma_toggle_temp and gamma_pieces_temp.
|
||||||
PopSystemTemp(2);
|
PopSystemTemp(2);
|
||||||
|
|
||||||
// Remap guest render target indices to host since because on the host, the
|
// Write the values to the render targets.
|
||||||
// indices of the bound render targets are consecutive. This is done using 16
|
if (edram_rov_used_) {
|
||||||
// movc instructions because indexable temps are known to be causing
|
CompletePixelShader_WriteToROV();
|
||||||
// performance issues on some Nvidia GPUs. In the map, the components are host
|
} else {
|
||||||
// render target indices, and the values are the guest ones.
|
CompletePixelShader_WriteToRTVs();
|
||||||
uint32_t remap_movc_mask_temp = PushSystemTemp();
|
|
||||||
uint32_t remap_movc_target_temp = PushSystemTemp();
|
|
||||||
system_constants_used_ |= 1u << kSysConst_ColorOutputMap_Index;
|
|
||||||
// Host RT i, guest RT j.
|
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
|
||||||
// mask = map.iiii == (0, 1, 2, 3)
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
|
||||||
shader_code_.push_back(remap_movc_mask_temp);
|
|
||||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_ColorOutputMap_Vec);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
shader_code_.push_back(1);
|
|
||||||
shader_code_.push_back(2);
|
|
||||||
shader_code_.push_back(3);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.int_instruction_count;
|
|
||||||
for (uint32_t j = 0; j < 4; ++j) {
|
|
||||||
// If map.i == j, move guest color j to the temporary host color.
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
|
||||||
shader_code_.push_back(remap_movc_target_temp);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, j, 1));
|
|
||||||
shader_code_.push_back(remap_movc_mask_temp);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(system_temp_color_[j]);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(remap_movc_target_temp);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.movc_instruction_count;
|
|
||||||
}
|
|
||||||
// Write the remapped color to host render target i.
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
|
|
||||||
shader_code_.push_back(i);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(remap_movc_target_temp);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.mov_instruction_count;
|
|
||||||
}
|
}
|
||||||
// Free the temporary registers used for remapping.
|
|
||||||
PopSystemTemp(2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompleteShaderCode() {
|
void DxbcShaderTranslator::CompleteShaderCode() {
|
||||||
|
@ -8389,16 +8402,19 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
||||||
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_IS_FRONT_FACE));
|
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_IS_FRONT_FACE));
|
||||||
++stat_.dcl_count;
|
++stat_.dcl_count;
|
||||||
// Color output.
|
// Color output.
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
if (!edram_rov_used_) {
|
||||||
shader_object_.push_back(
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
|
shader_object_.push_back(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
|
||||||
shader_object_.push_back(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
|
shader_object_.push_back(
|
||||||
shader_object_.push_back(i);
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
|
||||||
++stat_.dcl_count;
|
shader_object_.push_back(i);
|
||||||
|
++stat_.dcl_count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Depth output.
|
// Depth output.
|
||||||
|
// TODO(Triang3l): Do something with this for ROV.
|
||||||
if (writes_depth_) {
|
if (writes_depth_) {
|
||||||
shader_object_.push_back(
|
shader_object_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
|
||||||
|
|
|
@ -23,7 +23,7 @@ namespace gpu {
|
||||||
// Generates shader model 5_1 byte code (for Direct3D 12).
|
// Generates shader model 5_1 byte code (for Direct3D 12).
|
||||||
class DxbcShaderTranslator : public ShaderTranslator {
|
class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
public:
|
public:
|
||||||
DxbcShaderTranslator(bool edram_rovs_used);
|
DxbcShaderTranslator(bool edram_rov_used);
|
||||||
~DxbcShaderTranslator() override;
|
~DxbcShaderTranslator() override;
|
||||||
|
|
||||||
// Constant buffer bindings in space 0.
|
// Constant buffer bindings in space 0.
|
||||||
|
@ -305,6 +305,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
|
|
||||||
// Writing the epilogue.
|
// Writing the epilogue.
|
||||||
void CompleteVertexShader();
|
void CompleteVertexShader();
|
||||||
|
void CompletePixelShader_WriteToRTVs();
|
||||||
|
void CompletePixelShader_WriteToROV();
|
||||||
void CompletePixelShader();
|
void CompletePixelShader();
|
||||||
void CompleteShaderCode();
|
void CompleteShaderCode();
|
||||||
|
|
||||||
|
@ -432,7 +434,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
std::vector<uint32_t> shader_object_;
|
std::vector<uint32_t> shader_object_;
|
||||||
|
|
||||||
// Whether the output merger should be emulated in pixel shaders.
|
// Whether the output merger should be emulated in pixel shaders.
|
||||||
bool edram_rovs_used_;
|
bool edram_rov_used_;
|
||||||
|
|
||||||
// Data types used in constants buffers. Listed in dependency order.
|
// Data types used in constants buffers. Listed in dependency order.
|
||||||
enum class RdefTypeIndex {
|
enum class RdefTypeIndex {
|
||||||
|
|
Loading…
Reference in New Issue