[D3D12] ROV: Empty pixel shader for depth-only drawing
This commit is contained in:
parent
6e9964b43e
commit
2d56c9ae30
|
@ -15,6 +15,7 @@
|
|||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
|
@ -42,6 +43,11 @@ PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
|
|||
edram_rov_used_(edram_rov_used) {
|
||||
shader_translator_ = std::make_unique<DxbcShaderTranslator>(edram_rov_used_);
|
||||
|
||||
if (edram_rov_used_) {
|
||||
depth_only_pixel_shader_ =
|
||||
std::move(shader_translator_->CreateDepthOnlyPixelShader());
|
||||
}
|
||||
|
||||
// Set pipeline state description values we never change.
|
||||
// Zero out tessellation, stream output, blend state and formats for render
|
||||
// targets 4+, node mask, cached PSO, flags and other things.
|
||||
|
@ -324,10 +330,15 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
if (pixel_shader != nullptr) {
|
||||
update_desc_.PS.pShaderBytecode = pixel_shader->translated_binary().data();
|
||||
update_desc_.PS.BytecodeLength = pixel_shader->translated_binary().size();
|
||||
} else {
|
||||
if (edram_rov_used_) {
|
||||
update_desc_.PS.pShaderBytecode = depth_only_pixel_shader_.data();
|
||||
update_desc_.PS.BytecodeLength = depth_only_pixel_shader_.size();
|
||||
} else {
|
||||
update_desc_.PS.pShaderBytecode = nullptr;
|
||||
update_desc_.PS.BytecodeLength = 0;
|
||||
}
|
||||
}
|
||||
switch (primitive_type) {
|
||||
case PrimitiveType::kPointList:
|
||||
update_desc_.GS.pShaderBytecode = primitive_point_list_gs;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define XENIA_GPU_D3D12_PIPELINE_CACHE_H_
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
|
||||
|
@ -93,6 +94,10 @@ class PipelineCache {
|
|||
// All loaded shaders mapped by their guest hash key.
|
||||
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
|
||||
|
||||
// Empty depth-only pixel shader for writing to depth buffer via ROV when no
|
||||
// Xenos pixel shader provided.
|
||||
std::vector<uint8_t> depth_only_pixel_shader_;
|
||||
|
||||
// Hash state used to incrementally produce pipeline hashes during update.
|
||||
// By the time the full update pass has run the hash will represent the
|
||||
// current state in a way that can uniquely identify the produced
|
||||
|
|
|
@ -450,11 +450,20 @@ bool DxbcShaderTranslator::GetBlendConstants(uint32_t blend_control,
|
|||
return (blend_control & 0x1FFF1FFF) != 0x00010001;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> DxbcShaderTranslator::CreateDepthOnlyPixelShader() {
|
||||
Reset();
|
||||
is_depth_only_pixel_shader_ = true;
|
||||
StartTranslation();
|
||||
return std::move(CompleteTranslation());
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::Reset() {
|
||||
ShaderTranslator::Reset();
|
||||
|
||||
shader_code_.clear();
|
||||
|
||||
is_depth_only_pixel_shader_ = false;
|
||||
|
||||
cbuffer_count_ = 0;
|
||||
// System constants always used in prologues/epilogues.
|
||||
cbuffer_index_system_constants_ = cbuffer_count_++;
|
||||
|
@ -484,8 +493,11 @@ void DxbcShaderTranslator::Reset() {
|
|||
|
||||
uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) {
|
||||
uint32_t register_index = system_temp_count_current_;
|
||||
if (!IndexableGPRsUsed()) {
|
||||
// Guest shader registers first if they're not in x0.
|
||||
if (!IndexableGPRsUsed() && !is_depth_only_pixel_shader_) {
|
||||
// Guest shader registers first if they're not in x0. Depth-only pixel
|
||||
// shader is a special case of the DXBC translator usage, where there are no
|
||||
// GPRs because there's no shader to translate, and a guest shader is not
|
||||
// loaded.
|
||||
register_index += register_count();
|
||||
}
|
||||
++system_temp_count_current_;
|
||||
|
@ -842,6 +854,11 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
++stat_.mov_instruction_count;
|
||||
}
|
||||
|
||||
// If not translating anything, we only need the depth.
|
||||
if (is_depth_only_pixel_shader_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy interpolants to GPRs.
|
||||
uint32_t interpolator_count = std::min(kInterpolatorCount, register_count());
|
||||
if (IndexableGPRsUsed()) {
|
||||
|
@ -1039,17 +1056,20 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
void DxbcShaderTranslator::StartTranslation() {
|
||||
// Allocate global system temporary registers that may also be used in the
|
||||
// epilogue.
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDXBCVertexShader()) {
|
||||
system_temp_position_ = PushSystemTemp(true);
|
||||
} else if (is_pixel_shader()) {
|
||||
} else if (IsDXBCPixelShader()) {
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
system_temp_color_[i] = PushSystemTemp(true);
|
||||
}
|
||||
}
|
||||
if (edram_rov_used_) {
|
||||
system_temp_depth_ = PushSystemTemp();
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Allocate system temporary variables for the translated code.
|
||||
system_temp_pv_ = PushSystemTemp(true);
|
||||
system_temp_ps_pc_p0_a0_ = PushSystemTemp(true);
|
||||
|
@ -1057,14 +1077,20 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
system_temp_loop_count_ = PushSystemTemp(true);
|
||||
system_temp_grad_h_lod_ = PushSystemTemp(true);
|
||||
system_temp_grad_v_ = PushSystemTemp(true);
|
||||
}
|
||||
|
||||
// Write stage-specific prologue.
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDXBCVertexShader()) {
|
||||
StartVertexShader();
|
||||
} else if (is_pixel_shader()) {
|
||||
} else if (IsDXBCPixelShader()) {
|
||||
StartPixelShader();
|
||||
}
|
||||
|
||||
// If not translating anything, don't start the main loop.
|
||||
if (is_depth_only_pixel_shader_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Start the main loop (for jumping to labels by setting pc and continuing).
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LOOP) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
|
@ -3286,6 +3312,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
|
||||
// Calculate the address in the EDRAM buffer.
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// 1a) Get dword offset within the tile to edram_coord_low_temp.x.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
|
@ -3303,6 +3330,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
shader_code_.push_back(edram_coord_low_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
}
|
||||
|
||||
// 1b) Do the same for depth/stencil to system_temp_depth_.w.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
||||
|
@ -3322,6 +3350,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// 2a) Combine the tile offset and the offset within the tile to
|
||||
// edram_coord_low_temp.x.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
||||
|
@ -3340,6 +3369,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
shader_code_.push_back(edram_coord_low_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
}
|
||||
|
||||
// 2b) Do the same for depth/stencil to system_temp_depth_.w.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
||||
|
@ -3359,9 +3389,13 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Adjust the offsets for 64 bits per pixel.
|
||||
// Adjust the offsets for 64 bits per pixel, and add EDRAM bases of color
|
||||
// render targets.
|
||||
|
||||
uint32_t edram_coord_high_temp = PushSystemTemp();
|
||||
uint32_t edram_coord_high_temp = 0;
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
edram_coord_high_temp = PushSystemTemp();
|
||||
|
||||
// Get which render targets are 64bpp, as log2 of dword count per pixel.
|
||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTPackWidthHigh_Index;
|
||||
|
@ -3391,11 +3425,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.movc_instruction_count;
|
||||
|
||||
// Multiply the offsets by 1 or 2 depending on the number of bits per pixel.
|
||||
// It's okay to do this here because everything in the equation (at least for
|
||||
// Xenia's representation of the EDRAM - may not be true on the real console)
|
||||
// needs to be multiplied by 2 - Y tile index (the same as multipying the
|
||||
// pitch by 2), X tile index (it addresses pairs of tiles in this case), and
|
||||
// the offset within a pair of tiles.
|
||||
// It's okay to do this here because everything in the equation (at least
|
||||
// for Xenia's representation of the EDRAM - may not be true on the real
|
||||
// console) needs to be multiplied by 2 - Y tile index (the same as
|
||||
// multipying the pitch by 2), X tile index (it addresses pairs of tiles in
|
||||
// this case), and the offset within a pair of tiles.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ISHL) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
|
@ -3404,8 +3438,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(edram_coord_low_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(edram_coord_high_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
@ -3417,8 +3451,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(edram_coord_low_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(edram_coord_low_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
||||
|
@ -3428,6 +3462,22 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// Get the offsets of the upper 32 bits.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(edram_coord_high_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(edram_coord_low_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(edram_coord_high_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
}
|
||||
|
||||
// Add the EDRAM base for depth.
|
||||
system_constants_used_ |= 1ull << kSysConst_EDRAMDepthBaseDwords_Index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
|
||||
|
@ -3447,21 +3497,6 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// Get the offsets of the upper 32 bits.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(edram_coord_high_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(edram_coord_low_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(edram_coord_high_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// ***************************************************************************
|
||||
// Do depth/stencil testing. This must be done before the color writing, so
|
||||
// discard happens before the write, and also because in case the EDRAM base
|
||||
|
@ -3742,6 +3777,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
// Write to color render targets.
|
||||
// ***************************************************************************
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
||||
|
||||
// Get what render targets need to be written to.
|
||||
|
@ -3836,8 +3872,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
uint32_t dest_color_temp = PushSystemTemp();
|
||||
CompletePixelShader_WriteToROV_LoadColor(
|
||||
edram_coord_low_temp, edram_coord_high_temp, rt_index, dest_color_temp);
|
||||
CompletePixelShader_WriteToROV_LoadColor(edram_coord_low_temp,
|
||||
edram_coord_high_temp, rt_index,
|
||||
dest_color_temp);
|
||||
|
||||
// Blend if needed.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||
|
@ -3849,9 +3886,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
shader_code_.push_back(rt_blend_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
CompletePixelShader_WriteToROV_Blend(rt_index, system_temp_color_[rt_index],
|
||||
dest_color_temp);
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
CompletePixelShader_WriteToROV_Blend(
|
||||
rt_index, system_temp_color_[rt_index], dest_color_temp);
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
|
||||
|
@ -3859,7 +3897,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
|
||||
// Release dest_color_temp.
|
||||
PopSystemTemp();
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
|
||||
|
@ -3871,17 +3910,30 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
system_temp_color_[rt_index]);
|
||||
|
||||
// Close the check whether the RT is used.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
}
|
||||
|
||||
// Release edram_coord_low_temp, edram_coord_high_temp, rt_used_temp,
|
||||
// rt_load_temp and rt_blend_temp.
|
||||
PopSystemTemp(5);
|
||||
// Release rt_used_temp, rt_load_temp and rt_blend_temp.
|
||||
PopSystemTemp(3);
|
||||
}
|
||||
|
||||
// Release edram_coord_low_temp and, if used, edram_coord_high_temp.
|
||||
PopSystemTemp(is_depth_only_pixel_shader_ ? 1 : 2);
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader() {
|
||||
if (is_depth_only_pixel_shader_) {
|
||||
// The depth-only shader only needs to do the depth test and to write the
|
||||
// depth to the ROV.
|
||||
if (edram_rov_used_) {
|
||||
CompletePixelShader_WriteToROV();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Alpha test.
|
||||
// Check if alpha test is enabled (if the constant is not 0).
|
||||
system_constants_used_ |= (1ull << kSysConst_AlphaTest_Index) |
|
||||
|
@ -4125,9 +4177,11 @@ void DxbcShaderTranslator::CompletePixelShader() {
|
|||
}
|
||||
|
||||
void DxbcShaderTranslator::CompleteShaderCode() {
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Close the last label and the switch.
|
||||
if (FLAGS_dxbc_switch) {
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
shader_code_.push_back(
|
||||
|
@ -4135,7 +4189,8 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
} else {
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
}
|
||||
|
@ -4143,7 +4198,8 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDLOOP) |
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDLOOP) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
|
||||
|
@ -4155,25 +4211,28 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
// - system_temp_grad_h_lod_.
|
||||
// - system_temp_grad_v_.
|
||||
PopSystemTemp(6);
|
||||
}
|
||||
|
||||
// Write stage-specific epilogue.
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDXBCVertexShader()) {
|
||||
CompleteVertexShader();
|
||||
} else if (is_pixel_shader()) {
|
||||
} else if (IsDXBCPixelShader()) {
|
||||
CompletePixelShader();
|
||||
}
|
||||
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDXBCVertexShader()) {
|
||||
// Release system_temp_position_.
|
||||
PopSystemTemp();
|
||||
} else if (is_pixel_shader()) {
|
||||
} else if (IsDXBCPixelShader()) {
|
||||
if (edram_rov_used_) {
|
||||
// Release system_temp_depth_.
|
||||
PopSystemTemp();
|
||||
}
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Release system_temp_color_.
|
||||
PopSystemTemp(4);
|
||||
}
|
||||
}
|
||||
|
||||
// Return from `main`.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RET) |
|
||||
|
@ -6802,7 +6861,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
uint32_t tfetch_pair_offset = (tfetch_index >> 1) * 3;
|
||||
|
||||
// TODO(Triang3l): kGetTextureBorderColorFrac.
|
||||
if (!is_pixel_shader() &&
|
||||
if (!IsDXBCPixelShader() &&
|
||||
(instr.opcode == FetchOpcode::kGetTextureComputedLod ||
|
||||
instr.opcode == FetchOpcode::kGetTextureGradients)) {
|
||||
// Quickly skip everything if tried to get anything involving derivatives
|
||||
|
@ -7403,7 +7462,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
|
||||
// The non-pixel-shader case should be handled before because it
|
||||
// just returns a constant in this case.
|
||||
assert_true(is_pixel_shader());
|
||||
assert_true(IsDXBCPixelShader());
|
||||
replicate_result = true;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) |
|
||||
|
@ -7508,7 +7567,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// Both sample_l and sample_b should add the LOD bias as the last
|
||||
// operand in our case.
|
||||
bool explicit_lod =
|
||||
!instr.attributes.use_computed_lod || !is_pixel_shader();
|
||||
!instr.attributes.use_computed_lod || !IsDXBCPixelShader();
|
||||
if (explicit_lod) {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
|
||||
|
@ -7887,7 +7946,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// Release coord_temp.
|
||||
PopSystemTemp();
|
||||
} else if (instr.opcode == FetchOpcode::kGetTextureGradients) {
|
||||
assert_true(is_pixel_shader());
|
||||
assert_true(IsDXBCPixelShader());
|
||||
store_result = true;
|
||||
// pv.xz = ddx(coord.xy)
|
||||
shader_code_.push_back(
|
||||
|
@ -10477,22 +10536,25 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// Constant buffer offset (set later).
|
||||
shader_object_.push_back(0);
|
||||
// Bound resource count (samplers, SRV, UAV, CBV).
|
||||
uint32_t resource_count = cbuffer_count_;
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// + 1 for shared memory (vfetches can probably appear in pixel shaders too,
|
||||
// they are handled safely there anyway).
|
||||
uint32_t resource_count = uint32_t(sampler_bindings_.size()) + 1 +
|
||||
uint32_t(texture_srvs_.size()) + cbuffer_count_;
|
||||
if (is_pixel_shader() && edram_rov_used_) {
|
||||
resource_count +=
|
||||
uint32_t(sampler_bindings_.size()) + 1 + uint32_t(texture_srvs_.size());
|
||||
}
|
||||
if (IsDXBCPixelShader() && edram_rov_used_) {
|
||||
// EDRAM.
|
||||
++resource_count;
|
||||
}
|
||||
shader_object_.push_back(resource_count);
|
||||
// Bound resource buffer offset (set later).
|
||||
shader_object_.push_back(0);
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDXBCVertexShader()) {
|
||||
// vs_5_1
|
||||
shader_object_.push_back(0xFFFE0501u);
|
||||
} else {
|
||||
assert_true(is_pixel_shader());
|
||||
assert_true(IsDXBCPixelShader());
|
||||
// ps_5_1
|
||||
shader_object_.push_back(0xFFFF0501u);
|
||||
}
|
||||
|
@ -10787,25 +10849,31 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// their names already.
|
||||
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
|
||||
sizeof(uint32_t);
|
||||
uint32_t sampler_name_offset = new_offset;
|
||||
uint32_t sampler_name_offset = 0;
|
||||
uint32_t shared_memory_name_offset = 0;
|
||||
uint32_t texture_name_offset = 0;
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
sampler_name_offset = new_offset;
|
||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||
new_offset +=
|
||||
AppendString(shader_object_, sampler_bindings_[i].name.c_str());
|
||||
}
|
||||
uint32_t shared_memory_name_offset = new_offset;
|
||||
shared_memory_name_offset = new_offset;
|
||||
new_offset += AppendString(shader_object_, "xe_shared_memory");
|
||||
uint32_t texture_name_offset = new_offset;
|
||||
texture_name_offset = new_offset;
|
||||
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
||||
new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str());
|
||||
}
|
||||
}
|
||||
uint32_t edram_name_offset = new_offset;
|
||||
if (is_pixel_shader() && edram_rov_used_) {
|
||||
if (IsDXBCPixelShader() && edram_rov_used_) {
|
||||
new_offset += AppendString(shader_object_, "xe_edram");
|
||||
}
|
||||
|
||||
// Write the offset to the header.
|
||||
shader_object_[chunk_position_dwords + 3] = new_offset;
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Samplers.
|
||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||
const SamplerBinding& sampler_binding = sampler_bindings_[i];
|
||||
|
@ -10886,8 +10954,9 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
shader_object_.push_back(1 + i);
|
||||
texture_name_offset += GetStringLength(texture_srv.name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (is_pixel_shader() && edram_rov_used_) {
|
||||
if (IsDXBCPixelShader() && edram_rov_used_) {
|
||||
// EDRAM uint32 buffer.
|
||||
shader_object_.push_back(edram_name_offset);
|
||||
// D3D_SIT_UAV_RWTYPED.
|
||||
|
@ -10954,7 +11023,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
const uint32_t signature_position_dwords = 2;
|
||||
const uint32_t signature_size_dwords = 6;
|
||||
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDXBCVertexShader()) {
|
||||
// Only unswapped vertex index.
|
||||
shader_object_.push_back(1);
|
||||
// Unknown.
|
||||
|
@ -10977,7 +11046,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
// Vertex index semantic name.
|
||||
AppendString(shader_object_, "SV_VertexID");
|
||||
} else {
|
||||
assert_true(is_pixel_shader());
|
||||
assert_true(IsDXBCPixelShader());
|
||||
// Interpolators, point parameters (coordinates, size), screen position,
|
||||
// is front face.
|
||||
shader_object_.push_back(kInterpolatorCount + 3);
|
||||
|
@ -10996,7 +11065,10 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
shader_object_.push_back(kPSInInterpolatorRegister + i);
|
||||
// Interpolators are copied to GPRs in the beginning of the shader. If
|
||||
// there's a register to copy to, this interpolator is used.
|
||||
shader_object_.push_back(0xF | (i < register_count() ? (0xF << 8) : 0));
|
||||
uint32_t interpolator_used =
|
||||
(!is_depth_only_pixel_shader_ && i < register_count()) ? (0xF << 8)
|
||||
: 0;
|
||||
shader_object_.push_back(0xF | interpolator_used);
|
||||
}
|
||||
|
||||
// Point parameters - coordinate on the point and point size as a float3
|
||||
|
@ -11007,7 +11079,8 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(3);
|
||||
shader_object_.push_back(kPSInPointParametersRegister);
|
||||
shader_object_.push_back(0x7 | (0x3 << 8));
|
||||
shader_object_.push_back(0x7 |
|
||||
(is_depth_only_pixel_shader_ ? 0 : (0x3 << 8)));
|
||||
|
||||
// Position (only XY needed for ps_param_gen, but XYZ needed for ROV).
|
||||
// Always used because ps_param_gen is handled dynamically and because this
|
||||
|
@ -11027,7 +11100,8 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
shader_object_.push_back(9);
|
||||
shader_object_.push_back(1);
|
||||
shader_object_.push_back(kPSInFrontFaceRegister);
|
||||
shader_object_.push_back(0x1 | (0x1 << 8));
|
||||
shader_object_.push_back(0x1 |
|
||||
(is_depth_only_pixel_shader_ ? 0 : (0x1 << 8)));
|
||||
|
||||
// Write the semantic names.
|
||||
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
|
||||
|
@ -11060,7 +11134,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
const uint32_t signature_position_dwords = 2;
|
||||
const uint32_t signature_size_dwords = 6;
|
||||
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDXBCVertexShader()) {
|
||||
// Interpolators, point parameters (coordinates, size), screen position.
|
||||
shader_object_.push_back(kInterpolatorCount + 2);
|
||||
// Unknown.
|
||||
|
@ -11116,7 +11190,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
shader_object_[position_name_position_dwords] = new_offset;
|
||||
new_offset += AppendString(shader_object_, "SV_Position");
|
||||
} else {
|
||||
assert_true(is_pixel_shader());
|
||||
assert_true(IsDXBCPixelShader());
|
||||
if (edram_rov_used_) {
|
||||
// No outputs - only ROV read/write.
|
||||
shader_object_.push_back(0);
|
||||
|
@ -11124,11 +11198,13 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
shader_object_.push_back(8);
|
||||
} else {
|
||||
// Color render targets, optionally depth.
|
||||
shader_object_.push_back(4 + (writes_depth_ ? 1 : 0));
|
||||
shader_object_.push_back((is_depth_only_pixel_shader_ ? 0 : 4) +
|
||||
(writes_depth_ ? 1 : 0));
|
||||
// Unknown.
|
||||
shader_object_.push_back(8);
|
||||
|
||||
// Color render targets.
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
// Reserve space for the semantic name (SV_Target).
|
||||
shader_object_.push_back(0);
|
||||
|
@ -11142,6 +11218,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
// to make the indices consecutive.
|
||||
shader_object_.push_back(0xF);
|
||||
}
|
||||
}
|
||||
|
||||
// Depth.
|
||||
if (writes_depth_) {
|
||||
|
@ -11157,12 +11234,14 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
// Write the semantic names.
|
||||
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
|
||||
sizeof(uint32_t);
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint32_t color_name_position_dwords = chunk_position_dwords +
|
||||
signature_position_dwords +
|
||||
i * signature_size_dwords;
|
||||
shader_object_[color_name_position_dwords] = new_offset;
|
||||
}
|
||||
}
|
||||
new_offset += AppendString(shader_object_, "SV_Target");
|
||||
if (writes_depth_) {
|
||||
uint32_t depth_name_position_dwords = chunk_position_dwords +
|
||||
|
@ -11179,7 +11258,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
uint32_t chunk_position_dwords = uint32_t(shader_object_.size());
|
||||
|
||||
D3D10_SB_TOKENIZED_PROGRAM_TYPE program_type =
|
||||
is_vertex_shader() ? D3D10_SB_VERTEX_SHADER : D3D10_SB_PIXEL_SHADER;
|
||||
IsDXBCVertexShader() ? D3D10_SB_VERTEX_SHADER : D3D10_SB_PIXEL_SHADER;
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(program_type, 5, 1));
|
||||
// Reserve space for the length token.
|
||||
|
@ -11333,7 +11412,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
|
||||
// Unordered access views.
|
||||
if (is_pixel_shader() && edram_rov_used_) {
|
||||
if (IsDXBCPixelShader() && edram_rov_used_) {
|
||||
// EDRAM uint32 rasterizer-ordered buffer (U0, at u0, space0).
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||
|
@ -11355,7 +11434,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
|
||||
// Inputs and outputs.
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDXBCVertexShader()) {
|
||||
// Unswapped vertex index input (only X component).
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SGV) |
|
||||
|
@ -11392,8 +11471,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(kVSOutPositionRegister);
|
||||
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_POSITION));
|
||||
++stat_.dcl_count;
|
||||
} else if (is_pixel_shader()) {
|
||||
} else if (IsDXBCPixelShader()) {
|
||||
// Interpolator input.
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
uint32_t interpolator_count =
|
||||
std::min(kInterpolatorCount, register_count());
|
||||
for (uint32_t i = 0; i < interpolator_count; ++i) {
|
||||
|
@ -11417,6 +11497,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0011, 1));
|
||||
shader_object_.push_back(kPSInPointParametersRegister);
|
||||
++stat_.dcl_count;
|
||||
}
|
||||
// Position input (only XY needed for ps_param_gen, but for ROV access, XYZ
|
||||
// are needed).
|
||||
shader_object_.push_back(
|
||||
|
@ -11429,30 +11510,36 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(kPSInPositionRegister);
|
||||
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_POSITION));
|
||||
++stat_.dcl_count;
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Is front face.
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4) |
|
||||
// This needs to be set according to FXC output, despite the description
|
||||
// in d3d12TokenizedProgramFormat.hpp saying bits 11:23 are ignored.
|
||||
// This needs to be set according to FXC output, despite the
|
||||
// description in d3d12TokenizedProgramFormat.hpp saying bits 11:23
|
||||
// are ignored.
|
||||
ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(
|
||||
D3D10_SB_INTERPOLATION_CONSTANT));
|
||||
shader_object_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0001, 1));
|
||||
shader_object_.push_back(kPSInFrontFaceRegister);
|
||||
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_IS_FRONT_FACE));
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_IS_FRONT_FACE));
|
||||
++stat_.dcl_count;
|
||||
}
|
||||
if (!edram_rov_used_) {
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Color output.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_object_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
|
||||
shader_object_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
|
||||
shader_object_.push_back(i);
|
||||
++stat_.dcl_count;
|
||||
}
|
||||
}
|
||||
// Depth output.
|
||||
if (writes_depth_) {
|
||||
shader_object_.push_back(
|
||||
|
@ -11468,7 +11555,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
// Temporary registers - guest general-purpose registers if not using dynamic
|
||||
// indexing and Xenia internal registers.
|
||||
stat_.temp_register_count = system_temp_count_max_;
|
||||
if (!IndexableGPRsUsed()) {
|
||||
if (!is_depth_only_pixel_shader_ && !IndexableGPRsUsed()) {
|
||||
stat_.temp_register_count += register_count();
|
||||
}
|
||||
if (stat_.temp_register_count != 0) {
|
||||
|
@ -11479,7 +11566,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
|
||||
// General-purpose registers if using dynamic indexing (x0).
|
||||
if (IndexableGPRsUsed()) {
|
||||
if (!is_depth_only_pixel_shader_ && IndexableGPRsUsed()) {
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
|
||||
|
@ -11493,7 +11580,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
|
||||
// Initialize the depth output if used, which must be initialized on every
|
||||
// execution path.
|
||||
if (is_pixel_shader() && writes_depth_) {
|
||||
if (!edram_rov_used_ && IsDXBCPixelShader() && writes_depth_) {
|
||||
shader_object_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
|
||||
shader_object_.push_back(
|
||||
|
|
|
@ -376,6 +376,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
static bool GetBlendConstants(uint32_t blend_control, uint32_t& blend_x_out,
|
||||
uint32_t& blend_y_out);
|
||||
|
||||
// Creates a special pixel shader without color outputs - this resets the
|
||||
// state of the translator.
|
||||
std::vector<uint8_t> CreateDepthOnlyPixelShader();
|
||||
|
||||
protected:
|
||||
void Reset() override;
|
||||
|
||||
|
@ -603,6 +607,15 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
(index_representation_1 << 25) | (index_representation_2 << 28);
|
||||
}
|
||||
|
||||
// Use these instead of is_vertex_shader/is_pixel_shader because they don't
|
||||
// take is_depth_only_pixel_shader_ into account.
|
||||
inline bool IsDXBCVertexShader() const {
|
||||
return !is_depth_only_pixel_shader_ && is_vertex_shader();
|
||||
}
|
||||
inline bool IsDXBCPixelShader() const {
|
||||
return is_depth_only_pixel_shader_ || is_pixel_shader();
|
||||
}
|
||||
|
||||
// Allocates a new r# register for internal use and returns its index.
|
||||
uint32_t PushSystemTemp(bool zero = false);
|
||||
// Frees the last allocated internal r# registers for later reuse.
|
||||
|
@ -783,6 +796,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// Whether the output merger should be emulated in pixel shaders.
|
||||
bool edram_rov_used_;
|
||||
|
||||
// Is currently writing the empty depth-only pixel shader, for
|
||||
// CompleteTranslation.
|
||||
bool is_depth_only_pixel_shader_;
|
||||
|
||||
// Data types used in constants buffers. Listed in dependency order.
|
||||
enum class RdefTypeIndex {
|
||||
kFloat,
|
||||
|
|
Loading…
Reference in New Issue