[D3D12] ROV: Empty pixel shader for depth-only drawing

This commit is contained in:
Triang3l 2018-10-16 14:02:43 +03:00
parent 6e9964b43e
commit 2d56c9ae30
4 changed files with 560 additions and 440 deletions

View File

@ -15,6 +15,7 @@
#include <cinttypes>
#include <cmath>
#include <cstring>
#include <utility>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
@ -42,6 +43,11 @@ PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
edram_rov_used_(edram_rov_used) {
shader_translator_ = std::make_unique<DxbcShaderTranslator>(edram_rov_used_);
if (edram_rov_used_) {
depth_only_pixel_shader_ =
std::move(shader_translator_->CreateDepthOnlyPixelShader());
}
// Set pipeline state description values we never change.
// Zero out tessellation, stream output, blend state and formats for render
// targets 4+, node mask, cached PSO, flags and other things.
@ -324,10 +330,15 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
if (pixel_shader != nullptr) {
update_desc_.PS.pShaderBytecode = pixel_shader->translated_binary().data();
update_desc_.PS.BytecodeLength = pixel_shader->translated_binary().size();
} else {
if (edram_rov_used_) {
update_desc_.PS.pShaderBytecode = depth_only_pixel_shader_.data();
update_desc_.PS.BytecodeLength = depth_only_pixel_shader_.size();
} else {
update_desc_.PS.pShaderBytecode = nullptr;
update_desc_.PS.BytecodeLength = 0;
}
}
switch (primitive_type) {
case PrimitiveType::kPointList:
update_desc_.GS.pShaderBytecode = primitive_point_list_gs;

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_D3D12_PIPELINE_CACHE_H_
#include <unordered_map>
#include <vector>
#include "third_party/xxhash/xxhash.h"
@ -93,6 +94,10 @@ class PipelineCache {
// All loaded shaders mapped by their guest hash key.
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
// Empty depth-only pixel shader for writing to depth buffer via ROV when no
// Xenos pixel shader provided.
std::vector<uint8_t> depth_only_pixel_shader_;
// Hash state used to incrementally produce pipeline hashes during update.
// By the time the full update pass has run the hash will represent the
// current state in a way that can uniquely identify the produced

View File

@ -450,11 +450,20 @@ bool DxbcShaderTranslator::GetBlendConstants(uint32_t blend_control,
return (blend_control & 0x1FFF1FFF) != 0x00010001;
}
std::vector<uint8_t> DxbcShaderTranslator::CreateDepthOnlyPixelShader() {
Reset();
is_depth_only_pixel_shader_ = true;
StartTranslation();
return std::move(CompleteTranslation());
}
void DxbcShaderTranslator::Reset() {
ShaderTranslator::Reset();
shader_code_.clear();
is_depth_only_pixel_shader_ = false;
cbuffer_count_ = 0;
// System constants always used in prologues/epilogues.
cbuffer_index_system_constants_ = cbuffer_count_++;
@ -484,8 +493,11 @@ void DxbcShaderTranslator::Reset() {
uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) {
uint32_t register_index = system_temp_count_current_;
if (!IndexableGPRsUsed()) {
// Guest shader registers first if they're not in x0.
if (!IndexableGPRsUsed() && !is_depth_only_pixel_shader_) {
// Guest shader registers first if they're not in x0. Depth-only pixel
// shader is a special case of the DXBC translator usage, where there are no
// GPRs because there's no shader to translate, and a guest shader is not
// loaded.
register_index += register_count();
}
++system_temp_count_current_;
@ -842,6 +854,11 @@ void DxbcShaderTranslator::StartPixelShader() {
++stat_.mov_instruction_count;
}
// If not translating anything, we only need the depth.
if (is_depth_only_pixel_shader_) {
return;
}
// Copy interpolants to GPRs.
uint32_t interpolator_count = std::min(kInterpolatorCount, register_count());
if (IndexableGPRsUsed()) {
@ -1039,17 +1056,20 @@ void DxbcShaderTranslator::StartPixelShader() {
void DxbcShaderTranslator::StartTranslation() {
// Allocate global system temporary registers that may also be used in the
// epilogue.
if (is_vertex_shader()) {
if (IsDXBCVertexShader()) {
system_temp_position_ = PushSystemTemp(true);
} else if (is_pixel_shader()) {
} else if (IsDXBCPixelShader()) {
if (!is_depth_only_pixel_shader_) {
for (uint32_t i = 0; i < 4; ++i) {
system_temp_color_[i] = PushSystemTemp(true);
}
}
if (edram_rov_used_) {
system_temp_depth_ = PushSystemTemp();
}
}
if (!is_depth_only_pixel_shader_) {
// Allocate system temporary variables for the translated code.
system_temp_pv_ = PushSystemTemp(true);
system_temp_ps_pc_p0_a0_ = PushSystemTemp(true);
@ -1057,14 +1077,20 @@ void DxbcShaderTranslator::StartTranslation() {
system_temp_loop_count_ = PushSystemTemp(true);
system_temp_grad_h_lod_ = PushSystemTemp(true);
system_temp_grad_v_ = PushSystemTemp(true);
}
// Write stage-specific prologue.
if (is_vertex_shader()) {
if (IsDXBCVertexShader()) {
StartVertexShader();
} else if (is_pixel_shader()) {
} else if (IsDXBCPixelShader()) {
StartPixelShader();
}
// If not translating anything, don't start the main loop.
if (is_depth_only_pixel_shader_) {
return;
}
// Start the main loop (for jumping to labels by setting pc and continuing).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LOOP) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
@ -3286,6 +3312,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// Calculate the address in the EDRAM buffer.
if (!is_depth_only_pixel_shader_) {
// 1a) Get dword offset within the tile to edram_coord_low_temp.x.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
@ -3303,6 +3330,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(edram_coord_low_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
// 1b) Do the same for depth/stencil to system_temp_depth_.w.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
@ -3322,6 +3350,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.uint_instruction_count;
if (!is_depth_only_pixel_shader_) {
// 2a) Combine the tile offset and the offset within the tile to
// edram_coord_low_temp.x.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
@ -3340,6 +3369,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(edram_coord_low_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
// 2b) Do the same for depth/stencil to system_temp_depth_.w.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
@ -3359,9 +3389,13 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Adjust the offsets for 64 bits per pixel.
// Adjust the offsets for 64 bits per pixel, and add EDRAM bases of color
// render targets.
uint32_t edram_coord_high_temp = PushSystemTemp();
uint32_t edram_coord_high_temp = 0;
if (!is_depth_only_pixel_shader_) {
edram_coord_high_temp = PushSystemTemp();
// Get which render targets are 64bpp, as log2 of dword count per pixel.
system_constants_used_ |= 1ull << kSysConst_EDRAMRTPackWidthHigh_Index;
@ -3391,11 +3425,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.movc_instruction_count;
// Multiply the offsets by 1 or 2 depending on the number of bits per pixel.
// It's okay to do this here because everything in the equation (at least for
// Xenia's representation of the EDRAM - may not be true on the real console)
// needs to be multiplied by 2 - Y tile index (the same as multipying the
// pitch by 2), X tile index (it addresses pairs of tiles in this case), and
// the offset within a pair of tiles.
// It's okay to do this here because everything in the equation (at least
// for Xenia's representation of the EDRAM - may not be true on the real
// console) needs to be multiplied by 2 - Y tile index (the same as
// multipying the pitch by 2), X tile index (it addresses pairs of tiles in
// this case), and the offset within a pair of tiles.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ISHL) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
@ -3404,8 +3438,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(edram_coord_low_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(edram_coord_high_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
@ -3417,8 +3451,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(edram_coord_low_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(edram_coord_low_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
@ -3428,6 +3462,22 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.int_instruction_count;
// Get the offsets of the upper 32 bits.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(edram_coord_high_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(edram_coord_low_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(edram_coord_high_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
}
// Add the EDRAM base for depth.
system_constants_used_ |= 1ull << kSysConst_EDRAMDepthBaseDwords_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
@ -3447,21 +3497,6 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.int_instruction_count;
// Get the offsets of the upper 32 bits.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(edram_coord_high_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(edram_coord_low_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(edram_coord_high_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
// ***************************************************************************
// Do depth/stencil testing. This must be done before the color writing, so
// discard happens before the write, and also because in case the EDRAM base
@ -3742,6 +3777,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// Write to color render targets.
// ***************************************************************************
if (!is_depth_only_pixel_shader_) {
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
// Get what render targets need to be written to.
@ -3836,8 +3872,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
uint32_t dest_color_temp = PushSystemTemp();
CompletePixelShader_WriteToROV_LoadColor(
edram_coord_low_temp, edram_coord_high_temp, rt_index, dest_color_temp);
CompletePixelShader_WriteToROV_LoadColor(edram_coord_low_temp,
edram_coord_high_temp, rt_index,
dest_color_temp);
// Blend if needed.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
@ -3849,9 +3886,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(rt_blend_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
CompletePixelShader_WriteToROV_Blend(rt_index, system_temp_color_[rt_index],
dest_color_temp);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
CompletePixelShader_WriteToROV_Blend(
rt_index, system_temp_color_[rt_index], dest_color_temp);
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
@ -3859,7 +3897,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// Release dest_color_temp.
PopSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
@ -3871,17 +3910,30 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
system_temp_color_[rt_index]);
// Close the check whether the RT is used.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
// Release edram_coord_low_temp, edram_coord_high_temp, rt_used_temp,
// rt_load_temp and rt_blend_temp.
PopSystemTemp(5);
// Release rt_used_temp, rt_load_temp and rt_blend_temp.
PopSystemTemp(3);
}
// Release edram_coord_low_temp and, if used, edram_coord_high_temp.
PopSystemTemp(is_depth_only_pixel_shader_ ? 1 : 2);
}
void DxbcShaderTranslator::CompletePixelShader() {
if (is_depth_only_pixel_shader_) {
// The depth-only shader only needs to do the depth test and to write the
// depth to the ROV.
if (edram_rov_used_) {
CompletePixelShader_WriteToROV();
}
return;
}
// Alpha test.
// Check if alpha test is enabled (if the constant is not 0).
system_constants_used_ |= (1ull << kSysConst_AlphaTest_Index) |
@ -4125,9 +4177,11 @@ void DxbcShaderTranslator::CompletePixelShader() {
}
void DxbcShaderTranslator::CompleteShaderCode() {
if (!is_depth_only_pixel_shader_) {
// Close the last label and the switch.
if (FLAGS_dxbc_switch) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
shader_code_.push_back(
@ -4135,7 +4189,8 @@ void DxbcShaderTranslator::CompleteShaderCode() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
} else {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
@ -4143,7 +4198,8 @@ void DxbcShaderTranslator::CompleteShaderCode() {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDLOOP) |
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDLOOP) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
@ -4155,25 +4211,28 @@ void DxbcShaderTranslator::CompleteShaderCode() {
// - system_temp_grad_h_lod_.
// - system_temp_grad_v_.
PopSystemTemp(6);
}
// Write stage-specific epilogue.
if (is_vertex_shader()) {
if (IsDXBCVertexShader()) {
CompleteVertexShader();
} else if (is_pixel_shader()) {
} else if (IsDXBCPixelShader()) {
CompletePixelShader();
}
if (is_vertex_shader()) {
if (IsDXBCVertexShader()) {
// Release system_temp_position_.
PopSystemTemp();
} else if (is_pixel_shader()) {
} else if (IsDXBCPixelShader()) {
if (edram_rov_used_) {
// Release system_temp_depth_.
PopSystemTemp();
}
if (!is_depth_only_pixel_shader_) {
// Release system_temp_color_.
PopSystemTemp(4);
}
}
// Return from `main`.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RET) |
@ -6802,7 +6861,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
uint32_t tfetch_pair_offset = (tfetch_index >> 1) * 3;
// TODO(Triang3l): kGetTextureBorderColorFrac.
if (!is_pixel_shader() &&
if (!IsDXBCPixelShader() &&
(instr.opcode == FetchOpcode::kGetTextureComputedLod ||
instr.opcode == FetchOpcode::kGetTextureGradients)) {
// Quickly skip everything if tried to get anything involving derivatives
@ -7403,7 +7462,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
// The non-pixel-shader case should be handled before because it
// just returns a constant in this case.
assert_true(is_pixel_shader());
assert_true(IsDXBCPixelShader());
replicate_result = true;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) |
@ -7508,7 +7567,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Both sample_l and sample_b should add the LOD bias as the last
// operand in our case.
bool explicit_lod =
!instr.attributes.use_computed_lod || !is_pixel_shader();
!instr.attributes.use_computed_lod || !IsDXBCPixelShader();
if (explicit_lod) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
@ -7887,7 +7946,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Release coord_temp.
PopSystemTemp();
} else if (instr.opcode == FetchOpcode::kGetTextureGradients) {
assert_true(is_pixel_shader());
assert_true(IsDXBCPixelShader());
store_result = true;
// pv.xz = ddx(coord.xy)
shader_code_.push_back(
@ -10477,22 +10536,25 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Constant buffer offset (set later).
shader_object_.push_back(0);
// Bound resource count (samplers, SRV, UAV, CBV).
uint32_t resource_count = cbuffer_count_;
if (!is_depth_only_pixel_shader_) {
// + 1 for shared memory (vfetches can probably appear in pixel shaders too,
// they are handled safely there anyway).
uint32_t resource_count = uint32_t(sampler_bindings_.size()) + 1 +
uint32_t(texture_srvs_.size()) + cbuffer_count_;
if (is_pixel_shader() && edram_rov_used_) {
resource_count +=
uint32_t(sampler_bindings_.size()) + 1 + uint32_t(texture_srvs_.size());
}
if (IsDXBCPixelShader() && edram_rov_used_) {
// EDRAM.
++resource_count;
}
shader_object_.push_back(resource_count);
// Bound resource buffer offset (set later).
shader_object_.push_back(0);
if (is_vertex_shader()) {
if (IsDXBCVertexShader()) {
// vs_5_1
shader_object_.push_back(0xFFFE0501u);
} else {
assert_true(is_pixel_shader());
assert_true(IsDXBCPixelShader());
// ps_5_1
shader_object_.push_back(0xFFFF0501u);
}
@ -10787,25 +10849,31 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// their names already.
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
sizeof(uint32_t);
uint32_t sampler_name_offset = new_offset;
uint32_t sampler_name_offset = 0;
uint32_t shared_memory_name_offset = 0;
uint32_t texture_name_offset = 0;
if (!is_depth_only_pixel_shader_) {
sampler_name_offset = new_offset;
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
new_offset +=
AppendString(shader_object_, sampler_bindings_[i].name.c_str());
}
uint32_t shared_memory_name_offset = new_offset;
shared_memory_name_offset = new_offset;
new_offset += AppendString(shader_object_, "xe_shared_memory");
uint32_t texture_name_offset = new_offset;
texture_name_offset = new_offset;
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str());
}
}
uint32_t edram_name_offset = new_offset;
if (is_pixel_shader() && edram_rov_used_) {
if (IsDXBCPixelShader() && edram_rov_used_) {
new_offset += AppendString(shader_object_, "xe_edram");
}
// Write the offset to the header.
shader_object_[chunk_position_dwords + 3] = new_offset;
if (!is_depth_only_pixel_shader_) {
// Samplers.
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
const SamplerBinding& sampler_binding = sampler_bindings_[i];
@ -10886,8 +10954,9 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(1 + i);
texture_name_offset += GetStringLength(texture_srv.name.c_str());
}
}
if (is_pixel_shader() && edram_rov_used_) {
if (IsDXBCPixelShader() && edram_rov_used_) {
// EDRAM uint32 buffer.
shader_object_.push_back(edram_name_offset);
// D3D_SIT_UAV_RWTYPED.
@ -10954,7 +11023,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
const uint32_t signature_position_dwords = 2;
const uint32_t signature_size_dwords = 6;
if (is_vertex_shader()) {
if (IsDXBCVertexShader()) {
// Only unswapped vertex index.
shader_object_.push_back(1);
// Unknown.
@ -10977,7 +11046,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
// Vertex index semantic name.
AppendString(shader_object_, "SV_VertexID");
} else {
assert_true(is_pixel_shader());
assert_true(IsDXBCPixelShader());
// Interpolators, point parameters (coordinates, size), screen position,
// is front face.
shader_object_.push_back(kInterpolatorCount + 3);
@ -10996,7 +11065,10 @@ void DxbcShaderTranslator::WriteInputSignature() {
shader_object_.push_back(kPSInInterpolatorRegister + i);
// Interpolators are copied to GPRs in the beginning of the shader. If
// there's a register to copy to, this interpolator is used.
shader_object_.push_back(0xF | (i < register_count() ? (0xF << 8) : 0));
uint32_t interpolator_used =
(!is_depth_only_pixel_shader_ && i < register_count()) ? (0xF << 8)
: 0;
shader_object_.push_back(0xF | interpolator_used);
}
// Point parameters - coordinate on the point and point size as a float3
@ -11007,7 +11079,8 @@ void DxbcShaderTranslator::WriteInputSignature() {
shader_object_.push_back(0);
shader_object_.push_back(3);
shader_object_.push_back(kPSInPointParametersRegister);
shader_object_.push_back(0x7 | (0x3 << 8));
shader_object_.push_back(0x7 |
(is_depth_only_pixel_shader_ ? 0 : (0x3 << 8)));
// Position (only XY needed for ps_param_gen, but XYZ needed for ROV).
// Always used because ps_param_gen is handled dynamically and because this
@ -11027,7 +11100,8 @@ void DxbcShaderTranslator::WriteInputSignature() {
shader_object_.push_back(9);
shader_object_.push_back(1);
shader_object_.push_back(kPSInFrontFaceRegister);
shader_object_.push_back(0x1 | (0x1 << 8));
shader_object_.push_back(0x1 |
(is_depth_only_pixel_shader_ ? 0 : (0x1 << 8)));
// Write the semantic names.
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
@ -11060,7 +11134,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
const uint32_t signature_position_dwords = 2;
const uint32_t signature_size_dwords = 6;
if (is_vertex_shader()) {
if (IsDXBCVertexShader()) {
// Interpolators, point parameters (coordinates, size), screen position.
shader_object_.push_back(kInterpolatorCount + 2);
// Unknown.
@ -11116,7 +11190,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
shader_object_[position_name_position_dwords] = new_offset;
new_offset += AppendString(shader_object_, "SV_Position");
} else {
assert_true(is_pixel_shader());
assert_true(IsDXBCPixelShader());
if (edram_rov_used_) {
// No outputs - only ROV read/write.
shader_object_.push_back(0);
@ -11124,11 +11198,13 @@ void DxbcShaderTranslator::WriteOutputSignature() {
shader_object_.push_back(8);
} else {
// Color render targets, optionally depth.
shader_object_.push_back(4 + (writes_depth_ ? 1 : 0));
shader_object_.push_back((is_depth_only_pixel_shader_ ? 0 : 4) +
(writes_depth_ ? 1 : 0));
// Unknown.
shader_object_.push_back(8);
// Color render targets.
if (!is_depth_only_pixel_shader_) {
for (uint32_t i = 0; i < 4; ++i) {
// Reserve space for the semantic name (SV_Target).
shader_object_.push_back(0);
@ -11142,6 +11218,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
// to make the indices consecutive.
shader_object_.push_back(0xF);
}
}
// Depth.
if (writes_depth_) {
@ -11157,12 +11234,14 @@ void DxbcShaderTranslator::WriteOutputSignature() {
// Write the semantic names.
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
sizeof(uint32_t);
if (!is_depth_only_pixel_shader_) {
for (uint32_t i = 0; i < 4; ++i) {
uint32_t color_name_position_dwords = chunk_position_dwords +
signature_position_dwords +
i * signature_size_dwords;
shader_object_[color_name_position_dwords] = new_offset;
}
}
new_offset += AppendString(shader_object_, "SV_Target");
if (writes_depth_) {
uint32_t depth_name_position_dwords = chunk_position_dwords +
@ -11179,7 +11258,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
uint32_t chunk_position_dwords = uint32_t(shader_object_.size());
D3D10_SB_TOKENIZED_PROGRAM_TYPE program_type =
is_vertex_shader() ? D3D10_SB_VERTEX_SHADER : D3D10_SB_PIXEL_SHADER;
IsDXBCVertexShader() ? D3D10_SB_VERTEX_SHADER : D3D10_SB_PIXEL_SHADER;
shader_object_.push_back(
ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(program_type, 5, 1));
// Reserve space for the length token.
@ -11333,7 +11412,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
// Unordered access views.
if (is_pixel_shader() && edram_rov_used_) {
if (IsDXBCPixelShader() && edram_rov_used_) {
// EDRAM uint32 rasterizer-ordered buffer (U0, at u0, space0).
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
@ -11355,7 +11434,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
// Inputs and outputs.
if (is_vertex_shader()) {
if (IsDXBCVertexShader()) {
// Unswapped vertex index input (only X component).
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SGV) |
@ -11392,8 +11471,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(kVSOutPositionRegister);
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_POSITION));
++stat_.dcl_count;
} else if (is_pixel_shader()) {
} else if (IsDXBCPixelShader()) {
// Interpolator input.
if (!is_depth_only_pixel_shader_) {
uint32_t interpolator_count =
std::min(kInterpolatorCount, register_count());
for (uint32_t i = 0; i < interpolator_count; ++i) {
@ -11417,6 +11497,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0011, 1));
shader_object_.push_back(kPSInPointParametersRegister);
++stat_.dcl_count;
}
// Position input (only XY needed for ps_param_gen, but for ROV access, XYZ
// are needed).
shader_object_.push_back(
@ -11429,30 +11510,36 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(kPSInPositionRegister);
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_POSITION));
++stat_.dcl_count;
if (!is_depth_only_pixel_shader_) {
// Is front face.
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4) |
// This needs to be set according to FXC output, despite the description
// in d3d12TokenizedProgramFormat.hpp saying bits 11:23 are ignored.
// This needs to be set according to FXC output, despite the
// description in d3d12TokenizedProgramFormat.hpp saying bits 11:23
// are ignored.
ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(
D3D10_SB_INTERPOLATION_CONSTANT));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0001, 1));
shader_object_.push_back(kPSInFrontFaceRegister);
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_IS_FRONT_FACE));
shader_object_.push_back(
ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_IS_FRONT_FACE));
++stat_.dcl_count;
}
if (!edram_rov_used_) {
if (!is_depth_only_pixel_shader_) {
// Color output.
for (uint32_t i = 0; i < 4; ++i) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
shader_object_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
shader_object_.push_back(i);
++stat_.dcl_count;
}
}
// Depth output.
if (writes_depth_) {
shader_object_.push_back(
@ -11468,7 +11555,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
// Temporary registers - guest general-purpose registers if not using dynamic
// indexing and Xenia internal registers.
stat_.temp_register_count = system_temp_count_max_;
if (!IndexableGPRsUsed()) {
if (!is_depth_only_pixel_shader_ && !IndexableGPRsUsed()) {
stat_.temp_register_count += register_count();
}
if (stat_.temp_register_count != 0) {
@ -11479,7 +11566,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
// General-purpose registers if using dynamic indexing (x0).
if (IndexableGPRsUsed()) {
if (!is_depth_only_pixel_shader_ && IndexableGPRsUsed()) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
@ -11493,7 +11580,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
// Initialize the depth output if used, which must be initialized on every
// execution path.
if (is_pixel_shader() && writes_depth_) {
if (!edram_rov_used_ && IsDXBCPixelShader() && writes_depth_) {
shader_object_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
shader_object_.push_back(

View File

@ -376,6 +376,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
static bool GetBlendConstants(uint32_t blend_control, uint32_t& blend_x_out,
uint32_t& blend_y_out);
// Creates a special pixel shader without color outputs - this resets the
// state of the translator.
std::vector<uint8_t> CreateDepthOnlyPixelShader();
protected:
void Reset() override;
@ -603,6 +607,15 @@ class DxbcShaderTranslator : public ShaderTranslator {
(index_representation_1 << 25) | (index_representation_2 << 28);
}
// Use these instead of is_vertex_shader/is_pixel_shader because they don't
// take is_depth_only_pixel_shader_ into account.
inline bool IsDXBCVertexShader() const {
return !is_depth_only_pixel_shader_ && is_vertex_shader();
}
inline bool IsDXBCPixelShader() const {
return is_depth_only_pixel_shader_ || is_pixel_shader();
}
// Allocates a new r# register for internal use and returns its index.
uint32_t PushSystemTemp(bool zero = false);
// Frees the last allocated internal r# registers for later reuse.
@ -783,6 +796,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Whether the output merger should be emulated in pixel shaders.
bool edram_rov_used_;
// Is currently writing the empty depth-only pixel shader, for
// CompleteTranslation.
bool is_depth_only_pixel_shader_;
// Data types used in constants buffers. Listed in dependency order.
enum class RdefTypeIndex {
kFloat,