[D3D12] Force early Z with DSV, fix blend disabled flag in rb_colorcontrol ignored
This commit is contained in:
parent
d7ed044be1
commit
ef523823d5
|
@ -2199,8 +2199,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||||
|
|
||||||
// Alpha test.
|
// Alpha test.
|
||||||
int32_t alpha_test;
|
int32_t alpha_test;
|
||||||
if (rb_colorcontrol & 0x8) {
|
uint32_t alpha_test_function = rb_colorcontrol & 0x7;
|
||||||
uint32_t alpha_test_function = rb_colorcontrol & 0x7;
|
if ((rb_colorcontrol & 0x8) && alpha_test_function != 0x7) {
|
||||||
// 0: Never - fail in [-inf, +inf].
|
// 0: Never - fail in [-inf, +inf].
|
||||||
// 1: Less - fail in [ref, +inf].
|
// 1: Less - fail in [ref, +inf].
|
||||||
// 2: Equal - pass in [ref, ref].
|
// 2: Equal - pass in [ref, ref].
|
||||||
|
|
|
@ -40,6 +40,17 @@ class D3D12Shader : public Shader {
|
||||||
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
|
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
|
||||||
uint32_t sampler_binding_count);
|
uint32_t sampler_binding_count);
|
||||||
|
|
||||||
|
void SetForcedEarlyZShaderObject(const std::vector<uint8_t>& shader_object) {
|
||||||
|
forced_early_z_shader_ = shader_object;
|
||||||
|
}
|
||||||
|
// Returns the shader with forced early depth/stencil set with
|
||||||
|
// SetForcedEarlyZShader after translation. If there's none (for example,
|
||||||
|
// if the shader discards pixels or writes to the depth buffer), an empty
|
||||||
|
// vector is returned.
|
||||||
|
const std::vector<uint8_t>& GetForcedEarlyZShaderObject() const {
|
||||||
|
return forced_early_z_shader_;
|
||||||
|
}
|
||||||
|
|
||||||
bool DisassembleDxbc(const ui::d3d12::D3D12Provider* provider);
|
bool DisassembleDxbc(const ui::d3d12::D3D12Provider* provider);
|
||||||
|
|
||||||
static constexpr uint32_t kMaxTextureSRVIndexBits =
|
static constexpr uint32_t kMaxTextureSRVIndexBits =
|
||||||
|
@ -78,9 +89,12 @@ class D3D12Shader : public Shader {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
PrimitiveType domain_shader_primitive_type_ = PrimitiveType::kNone;
|
PrimitiveType domain_shader_primitive_type_ = PrimitiveType::kNone;
|
||||||
|
|
||||||
std::vector<TextureSRV> texture_srvs_;
|
std::vector<TextureSRV> texture_srvs_;
|
||||||
uint32_t used_texture_mask_ = 0;
|
uint32_t used_texture_mask_ = 0;
|
||||||
std::vector<SamplerBinding> sampler_bindings_;
|
std::vector<SamplerBinding> sampler_bindings_;
|
||||||
|
|
||||||
|
std::vector<uint8_t> forced_early_z_shader_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -331,6 +331,15 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
|
||||||
shader->ucode_disassembly().c_str());
|
shader->ucode_disassembly().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If may be useful, create a version of the shader with early depth/stencil
|
||||||
|
// forced.
|
||||||
|
if (shader->type() == ShaderType::kPixel && !edram_rov_used_ &&
|
||||||
|
shader->early_z_allowed()) {
|
||||||
|
shader->SetForcedEarlyZShaderObject(
|
||||||
|
std::move(DxbcShaderTranslator::ForceEarlyDepthStencil(
|
||||||
|
shader->translated_binary().data())));
|
||||||
|
}
|
||||||
|
|
||||||
// Disassemble the shader for dumping.
|
// Disassemble the shader for dumping.
|
||||||
if (FLAGS_d3d12_dxbc_disasm) {
|
if (FLAGS_d3d12_dxbc_disasm) {
|
||||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
|
@ -569,6 +578,8 @@ bool PipelineCache::GetCurrentStateDescription(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!edram_rov_used_) {
|
if (!edram_rov_used_) {
|
||||||
|
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||||
|
|
||||||
// Depth/stencil. No stencil, always passing depth test and no depth writing
|
// Depth/stencil. No stencil, always passing depth test and no depth writing
|
||||||
// means depth disabled.
|
// means depth disabled.
|
||||||
if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) {
|
if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) {
|
||||||
|
@ -616,6 +627,16 @@ bool PipelineCache::GetCurrentStateDescription(
|
||||||
description_out.depth_func = 0b111;
|
description_out.depth_func = 0b111;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Forced early Z if the shader allows that and alpha testing is disabled.
|
||||||
|
// TODO(Triang3l): For memexporting shaders, possibly choose this according
|
||||||
|
// to the early Z toggle in RB_DEPTHCONTROL (the correct behavior is still
|
||||||
|
// unknown).
|
||||||
|
if (pixel_shader != nullptr &&
|
||||||
|
pixel_shader->GetForcedEarlyZShaderObject().size() != 0 &&
|
||||||
|
(!(rb_colorcontrol & 0x8) || (rb_colorcontrol & 0x7) == 0x7)) {
|
||||||
|
description_out.force_early_z = 1;
|
||||||
|
}
|
||||||
|
|
||||||
// Render targets and blending state. 32 because of 0x1F mask, for safety
|
// Render targets and blending state. 32 because of 0x1F mask, for safety
|
||||||
// (all unknown to zero).
|
// (all unknown to zero).
|
||||||
uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader);
|
uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader);
|
||||||
|
@ -695,7 +716,7 @@ bool PipelineCache::GetCurrentStateDescription(
|
||||||
rt.format = RenderTargetCache::GetBaseColorFormat(
|
rt.format = RenderTargetCache::GetBaseColorFormat(
|
||||||
ColorRenderTargetFormat((color_info >> 16) & 0xF));
|
ColorRenderTargetFormat((color_info >> 16) & 0xF));
|
||||||
rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF;
|
rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF;
|
||||||
if (rt.write_mask) {
|
if (!(rb_colorcontrol & 0x20) && rt.write_mask) {
|
||||||
rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F];
|
rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F];
|
||||||
rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F];
|
rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F];
|
||||||
rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7);
|
rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7);
|
||||||
|
@ -874,10 +895,17 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState(
|
||||||
assert_always();
|
assert_always();
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
state_desc.PS.pShaderBytecode =
|
const auto& forced_early_z_shader =
|
||||||
description.pixel_shader->translated_binary().data();
|
description.pixel_shader->GetForcedEarlyZShaderObject();
|
||||||
state_desc.PS.BytecodeLength =
|
if (description.force_early_z && forced_early_z_shader.size() != 0) {
|
||||||
description.pixel_shader->translated_binary().size();
|
state_desc.PS.pShaderBytecode = forced_early_z_shader.data();
|
||||||
|
state_desc.PS.BytecodeLength = forced_early_z_shader.size();
|
||||||
|
} else {
|
||||||
|
state_desc.PS.pShaderBytecode =
|
||||||
|
description.pixel_shader->translated_binary().data();
|
||||||
|
state_desc.PS.BytecodeLength =
|
||||||
|
description.pixel_shader->translated_binary().size();
|
||||||
|
}
|
||||||
} else if (edram_rov_used_) {
|
} else if (edram_rov_used_) {
|
||||||
state_desc.PS.pShaderBytecode = depth_only_pixel_shader_.data();
|
state_desc.PS.pShaderBytecode = depth_only_pixel_shader_.data();
|
||||||
state_desc.PS.BytecodeLength = depth_only_pixel_shader_.size();
|
state_desc.PS.BytecodeLength = depth_only_pixel_shader_.size();
|
||||||
|
|
|
@ -155,6 +155,7 @@ class PipelineCache {
|
||||||
uint32_t depth_write : 1; // 21
|
uint32_t depth_write : 1; // 21
|
||||||
uint32_t stencil_enable : 1; // 22
|
uint32_t stencil_enable : 1; // 22
|
||||||
uint32_t stencil_read_mask : 8; // 30
|
uint32_t stencil_read_mask : 8; // 30
|
||||||
|
uint32_t force_early_z : 1; // 31
|
||||||
|
|
||||||
uint32_t stencil_write_mask : 8; // 8
|
uint32_t stencil_write_mask : 8; // 8
|
||||||
uint32_t stencil_front_fail_op : 3; // 11
|
uint32_t stencil_front_fail_op : 3; // 11
|
||||||
|
|
|
@ -89,6 +89,55 @@ DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
|
||||||
}
|
}
|
||||||
DxbcShaderTranslator::~DxbcShaderTranslator() = default;
|
DxbcShaderTranslator::~DxbcShaderTranslator() = default;
|
||||||
|
|
||||||
|
std::vector<uint8_t> DxbcShaderTranslator::ForceEarlyDepthStencil(
|
||||||
|
const uint8_t* shader) {
|
||||||
|
const uint32_t* old_shader = reinterpret_cast<const uint32_t*>(shader);
|
||||||
|
|
||||||
|
// To return something anyway even if patching fails.
|
||||||
|
std::vector<uint8_t> new_shader;
|
||||||
|
uint32_t shader_size_bytes = old_shader[6];
|
||||||
|
new_shader.resize(shader_size_bytes);
|
||||||
|
std::memcpy(new_shader.data(), shader, shader_size_bytes);
|
||||||
|
|
||||||
|
// Find the SHEX chunk.
|
||||||
|
uint32_t chunk_count = old_shader[7];
|
||||||
|
for (uint32_t i = 0; i < chunk_count; ++i) {
|
||||||
|
uint32_t chunk_offset_bytes = old_shader[8 + i];
|
||||||
|
const uint32_t* chunk = old_shader + chunk_offset_bytes / sizeof(uint32_t);
|
||||||
|
if (chunk[0] != 'XEHS') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Find dcl_globalFlags and patch it.
|
||||||
|
uint32_t code_size_dwords = chunk[3];
|
||||||
|
chunk += 4;
|
||||||
|
for (uint32_t j = 0; j < code_size_dwords;) {
|
||||||
|
uint32_t opcode_token = chunk[j];
|
||||||
|
uint32_t opcode = DECODE_D3D10_SB_OPCODE_TYPE(opcode_token);
|
||||||
|
if (opcode == D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) {
|
||||||
|
opcode_token |= D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL;
|
||||||
|
std::memcpy(new_shader.data() +
|
||||||
|
(chunk_offset_bytes + (4 + j) * sizeof(uint32_t)),
|
||||||
|
&opcode_token, sizeof(uint32_t));
|
||||||
|
// Recalculate the checksum since the shader was modified.
|
||||||
|
CalculateDXBCChecksum(
|
||||||
|
reinterpret_cast<unsigned char*>(new_shader.data()),
|
||||||
|
shader_size_bytes,
|
||||||
|
reinterpret_cast<unsigned int*>(new_shader.data() +
|
||||||
|
sizeof(uint32_t)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (opcode == D3D10_SB_OPCODE_CUSTOMDATA) {
|
||||||
|
j += chunk[j + 1];
|
||||||
|
} else {
|
||||||
|
j += DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(opcode_token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::move(new_shader);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> DxbcShaderTranslator::CreateDepthOnlyPixelShader() {
|
std::vector<uint8_t> DxbcShaderTranslator::CreateDepthOnlyPixelShader() {
|
||||||
Reset();
|
Reset();
|
||||||
is_depth_only_pixel_shader_ = true;
|
is_depth_only_pixel_shader_ = true;
|
||||||
|
@ -4034,7 +4083,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't allow refactoring when converting to native code to maintain position
|
// Don't allow refactoring when converting to native code to maintain position
|
||||||
// invariance (needed even in pixel shaders for oDepth invariance).
|
// invariance (needed even in pixel shaders for oDepth invariance). Also this
|
||||||
|
// dcl will be modified by ForceEarlyDepthStencil.
|
||||||
shader_object_.push_back(
|
shader_object_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
|
|
@ -491,6 +491,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kEDRAM,
|
kEDRAM,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Creates a copy of the shader with early depth/stencil testing forced,
|
||||||
|
// overriding that alpha testing is used in the shader.
|
||||||
|
static std::vector<uint8_t> ForceEarlyDepthStencil(const uint8_t* shader);
|
||||||
|
|
||||||
// Returns the bits that need to be added to the RT flags constant - needs to
|
// Returns the bits that need to be added to the RT flags constant - needs to
|
||||||
// be done externally, not in SetColorFormatConstants, because the flags
|
// be done externally, not in SetColorFormatConstants, because the flags
|
||||||
// contain other state.
|
// contain other state.
|
||||||
|
|
|
@ -607,6 +607,10 @@ class Shader {
|
||||||
// Returns true if the given color target index [0-3].
|
// Returns true if the given color target index [0-3].
|
||||||
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
||||||
|
|
||||||
|
// Returns true if the pixel shader can potentially have early depth/stencil
|
||||||
|
// testing enabled, provided alpha testing is disabled.
|
||||||
|
bool early_z_allowed() const { return early_z_allowed_; }
|
||||||
|
|
||||||
// True if the shader was translated and prepared without error.
|
// True if the shader was translated and prepared without error.
|
||||||
bool is_valid() const { return is_valid_; }
|
bool is_valid() const { return is_valid_; }
|
||||||
|
|
||||||
|
@ -655,6 +659,7 @@ class Shader {
|
||||||
std::vector<TextureBinding> texture_bindings_;
|
std::vector<TextureBinding> texture_bindings_;
|
||||||
ConstantRegisterMap constant_register_map_ = {0};
|
ConstantRegisterMap constant_register_map_ = {0};
|
||||||
bool writes_color_targets_[4] = {false, false, false, false};
|
bool writes_color_targets_[4] = {false, false, false, false};
|
||||||
|
bool early_z_allowed_ = true;
|
||||||
std::vector<uint32_t> memexport_stream_constants_;
|
std::vector<uint32_t> memexport_stream_constants_;
|
||||||
|
|
||||||
bool is_valid_ = false;
|
bool is_valid_ = false;
|
||||||
|
|
|
@ -65,6 +65,7 @@ void ShaderTranslator::Reset() {
|
||||||
writes_color_targets_[i] = false;
|
writes_color_targets_[i] = false;
|
||||||
}
|
}
|
||||||
writes_depth_ = false;
|
writes_depth_ = false;
|
||||||
|
early_z_allowed_ = true;
|
||||||
memexport_alloc_count_ = 0;
|
memexport_alloc_count_ = 0;
|
||||||
memexport_eA_written_ = 0;
|
memexport_eA_written_ = 0;
|
||||||
std::memset(&memexport_eM_written_, 0, sizeof(memexport_eM_written_));
|
std::memset(&memexport_eM_written_, 0, sizeof(memexport_eM_written_));
|
||||||
|
@ -189,6 +190,7 @@ bool ShaderTranslator::TranslateInternal(Shader* shader) {
|
||||||
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
|
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
|
||||||
shader->writes_color_targets_[i] = writes_color_targets_[i];
|
shader->writes_color_targets_[i] = writes_color_targets_[i];
|
||||||
}
|
}
|
||||||
|
shader->early_z_allowed_ = early_z_allowed_;
|
||||||
shader->memexport_stream_constants_.clear();
|
shader->memexport_stream_constants_.clear();
|
||||||
for (uint32_t memexport_stream_constant : memexport_stream_constants_) {
|
for (uint32_t memexport_stream_constant : memexport_stream_constants_) {
|
||||||
shader->memexport_stream_constants_.push_back(memexport_stream_constant);
|
shader->memexport_stream_constants_.push_back(memexport_stream_constant);
|
||||||
|
@ -288,6 +290,7 @@ void ShaderTranslator::GatherInstructionInformation(
|
||||||
if (op.has_vector_op()) {
|
if (op.has_vector_op()) {
|
||||||
const auto& opcode_info =
|
const auto& opcode_info =
|
||||||
alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
|
alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
|
||||||
|
early_z_allowed_ &= !opcode_info.disable_early_z;
|
||||||
for (size_t i = 0; i < opcode_info.argument_count; ++i) {
|
for (size_t i = 0; i < opcode_info.argument_count; ++i) {
|
||||||
if (op.src_is_temp(i + 1) && (op.src_reg(i + 1) & 0x40)) {
|
if (op.src_is_temp(i + 1) && (op.src_reg(i + 1) & 0x40)) {
|
||||||
uses_register_dynamic_addressing_ = true;
|
uses_register_dynamic_addressing_ = true;
|
||||||
|
@ -299,6 +302,7 @@ void ShaderTranslator::GatherInstructionInformation(
|
||||||
writes_color_targets_[op.vector_dest()] = true;
|
writes_color_targets_[op.vector_dest()] = true;
|
||||||
} else if (op.vector_dest() == 61) {
|
} else if (op.vector_dest() == 61) {
|
||||||
writes_depth_ = true;
|
writes_depth_ = true;
|
||||||
|
early_z_allowed_ = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (memexport_alloc_count_ > 0 &&
|
if (memexport_alloc_count_ > 0 &&
|
||||||
|
@ -335,6 +339,7 @@ void ShaderTranslator::GatherInstructionInformation(
|
||||||
if (op.has_scalar_op()) {
|
if (op.has_scalar_op()) {
|
||||||
const auto& opcode_info =
|
const auto& opcode_info =
|
||||||
alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
|
alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
|
||||||
|
early_z_allowed_ &= !opcode_info.disable_early_z;
|
||||||
if (opcode_info.argument_count == 1 && op.src_is_temp(3) &&
|
if (opcode_info.argument_count == 1 && op.src_is_temp(3) &&
|
||||||
(op.src_reg(3) & 0x40)) {
|
(op.src_reg(3) & 0x40)) {
|
||||||
uses_register_dynamic_addressing_ = true;
|
uses_register_dynamic_addressing_ = true;
|
||||||
|
@ -345,6 +350,7 @@ void ShaderTranslator::GatherInstructionInformation(
|
||||||
writes_color_targets_[op.scalar_dest()] = true;
|
writes_color_targets_[op.scalar_dest()] = true;
|
||||||
} else if (op.scalar_dest() == 61) {
|
} else if (op.scalar_dest() == 61) {
|
||||||
writes_depth_ = true;
|
writes_depth_ = true;
|
||||||
|
early_z_allowed_ = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (memexport_alloc_count_ > 0 &&
|
if (memexport_alloc_count_ > 0 &&
|
||||||
|
@ -1030,91 +1036,91 @@ void ShaderTranslator::ParseTextureFetchInstruction(
|
||||||
|
|
||||||
const ShaderTranslator::AluOpcodeInfo
|
const ShaderTranslator::AluOpcodeInfo
|
||||||
ShaderTranslator::alu_vector_opcode_infos_[0x20] = {
|
ShaderTranslator::alu_vector_opcode_infos_[0x20] = {
|
||||||
{"add", 2, 4}, // 0
|
{"add", 2, 4, false}, // 0
|
||||||
{"mul", 2, 4}, // 1
|
{"mul", 2, 4, false}, // 1
|
||||||
{"max", 2, 4}, // 2
|
{"max", 2, 4, false}, // 2
|
||||||
{"min", 2, 4}, // 3
|
{"min", 2, 4, false}, // 3
|
||||||
{"seq", 2, 4}, // 4
|
{"seq", 2, 4, false}, // 4
|
||||||
{"sgt", 2, 4}, // 5
|
{"sgt", 2, 4, false}, // 5
|
||||||
{"sge", 2, 4}, // 6
|
{"sge", 2, 4, false}, // 6
|
||||||
{"sne", 2, 4}, // 7
|
{"sne", 2, 4, false}, // 7
|
||||||
{"frc", 1, 4}, // 8
|
{"frc", 1, 4, false}, // 8
|
||||||
{"trunc", 1, 4}, // 9
|
{"trunc", 1, 4, false}, // 9
|
||||||
{"floor", 1, 4}, // 10
|
{"floor", 1, 4, false}, // 10
|
||||||
{"mad", 3, 4}, // 11
|
{"mad", 3, 4, false}, // 11
|
||||||
{"cndeq", 3, 4}, // 12
|
{"cndeq", 3, 4, false}, // 12
|
||||||
{"cndge", 3, 4}, // 13
|
{"cndge", 3, 4, false}, // 13
|
||||||
{"cndgt", 3, 4}, // 14
|
{"cndgt", 3, 4, false}, // 14
|
||||||
{"dp4", 2, 4}, // 15
|
{"dp4", 2, 4, false}, // 15
|
||||||
{"dp3", 2, 4}, // 16
|
{"dp3", 2, 4, false}, // 16
|
||||||
{"dp2add", 3, 4}, // 17
|
{"dp2add", 3, 4, false}, // 17
|
||||||
{"cube", 2, 4}, // 18
|
{"cube", 2, 4, false}, // 18
|
||||||
{"max4", 1, 4}, // 19
|
{"max4", 1, 4, false}, // 19
|
||||||
{"setp_eq_push", 2, 4}, // 20
|
{"setp_eq_push", 2, 4, false}, // 20
|
||||||
{"setp_ne_push", 2, 4}, // 21
|
{"setp_ne_push", 2, 4, false}, // 21
|
||||||
{"setp_gt_push", 2, 4}, // 22
|
{"setp_gt_push", 2, 4, false}, // 22
|
||||||
{"setp_ge_push", 2, 4}, // 23
|
{"setp_ge_push", 2, 4, false}, // 23
|
||||||
{"kill_eq", 2, 4}, // 24
|
{"kill_eq", 2, 4, true}, // 24
|
||||||
{"kill_gt", 2, 4}, // 25
|
{"kill_gt", 2, 4, true}, // 25
|
||||||
{"kill_ge", 2, 4}, // 26
|
{"kill_ge", 2, 4, true}, // 26
|
||||||
{"kill_ne", 2, 4}, // 27
|
{"kill_ne", 2, 4, true}, // 27
|
||||||
{"dst", 2, 4}, // 28
|
{"dst", 2, 4, false}, // 28
|
||||||
{"maxa", 2, 4}, // 29
|
{"maxa", 2, 4, false}, // 29
|
||||||
};
|
};
|
||||||
|
|
||||||
const ShaderTranslator::AluOpcodeInfo
|
const ShaderTranslator::AluOpcodeInfo
|
||||||
ShaderTranslator::alu_scalar_opcode_infos_[0x40] = {
|
ShaderTranslator::alu_scalar_opcode_infos_[0x40] = {
|
||||||
{"adds", 1, 2}, // 0
|
{"adds", 1, 2, false}, // 0
|
||||||
{"adds_prev", 1, 1}, // 1
|
{"adds_prev", 1, 1, false}, // 1
|
||||||
{"muls", 1, 2}, // 2
|
{"muls", 1, 2, false}, // 2
|
||||||
{"muls_prev", 1, 1}, // 3
|
{"muls_prev", 1, 1, false}, // 3
|
||||||
{"muls_prev2", 1, 2}, // 4
|
{"muls_prev2", 1, 2, false}, // 4
|
||||||
{"maxs", 1, 2}, // 5
|
{"maxs", 1, 2, false}, // 5
|
||||||
{"mins", 1, 2}, // 6
|
{"mins", 1, 2, false}, // 6
|
||||||
{"seqs", 1, 1}, // 7
|
{"seqs", 1, 1, false}, // 7
|
||||||
{"sgts", 1, 1}, // 8
|
{"sgts", 1, 1, false}, // 8
|
||||||
{"sges", 1, 1}, // 9
|
{"sges", 1, 1, false}, // 9
|
||||||
{"snes", 1, 1}, // 10
|
{"snes", 1, 1, false}, // 10
|
||||||
{"frcs", 1, 1}, // 11
|
{"frcs", 1, 1, false}, // 11
|
||||||
{"truncs", 1, 1}, // 12
|
{"truncs", 1, 1, false}, // 12
|
||||||
{"floors", 1, 1}, // 13
|
{"floors", 1, 1, false}, // 13
|
||||||
{"exp", 1, 1}, // 14
|
{"exp", 1, 1, false}, // 14
|
||||||
{"logc", 1, 1}, // 15
|
{"logc", 1, 1, false}, // 15
|
||||||
{"log", 1, 1}, // 16
|
{"log", 1, 1, false}, // 16
|
||||||
{"rcpc", 1, 1}, // 17
|
{"rcpc", 1, 1, false}, // 17
|
||||||
{"rcpf", 1, 1}, // 18
|
{"rcpf", 1, 1, false}, // 18
|
||||||
{"rcp", 1, 1}, // 19
|
{"rcp", 1, 1, false}, // 19
|
||||||
{"rsqc", 1, 1}, // 20
|
{"rsqc", 1, 1, false}, // 20
|
||||||
{"rsqf", 1, 1}, // 21
|
{"rsqf", 1, 1, false}, // 21
|
||||||
{"rsq", 1, 1}, // 22
|
{"rsq", 1, 1, false}, // 22
|
||||||
{"maxas", 1, 2}, // 23
|
{"maxas", 1, 2, false}, // 23
|
||||||
{"maxasf", 1, 2}, // 24
|
{"maxasf", 1, 2, false}, // 24
|
||||||
{"subs", 1, 2}, // 25
|
{"subs", 1, 2, false}, // 25
|
||||||
{"subs_prev", 1, 1}, // 26
|
{"subs_prev", 1, 1, false}, // 26
|
||||||
{"setp_eq", 1, 1}, // 27
|
{"setp_eq", 1, 1, false}, // 27
|
||||||
{"setp_ne", 1, 1}, // 28
|
{"setp_ne", 1, 1, false}, // 28
|
||||||
{"setp_gt", 1, 1}, // 29
|
{"setp_gt", 1, 1, false}, // 29
|
||||||
{"setp_ge", 1, 1}, // 30
|
{"setp_ge", 1, 1, false}, // 30
|
||||||
{"setp_inv", 1, 1}, // 31
|
{"setp_inv", 1, 1, false}, // 31
|
||||||
{"setp_pop", 1, 1}, // 32
|
{"setp_pop", 1, 1, false}, // 32
|
||||||
{"setp_clr", 1, 1}, // 33
|
{"setp_clr", 1, 1, false}, // 33
|
||||||
{"setp_rstr", 1, 1}, // 34
|
{"setp_rstr", 1, 1, false}, // 34
|
||||||
{"kills_eq", 1, 1}, // 35
|
{"kills_eq", 1, 1, true}, // 35
|
||||||
{"kills_gt", 1, 1}, // 36
|
{"kills_gt", 1, 1, true}, // 36
|
||||||
{"kills_ge", 1, 1}, // 37
|
{"kills_ge", 1, 1, true}, // 37
|
||||||
{"kills_ne", 1, 1}, // 38
|
{"kills_ne", 1, 1, true}, // 38
|
||||||
{"kills_one", 1, 1}, // 39
|
{"kills_one", 1, 1, true}, // 39
|
||||||
{"sqrt", 1, 1}, // 40
|
{"sqrt", 1, 1, false}, // 40
|
||||||
{"UNKNOWN", 0, 0}, // 41
|
{"UNKNOWN", 0, 0, false}, // 41
|
||||||
{"mulsc", 2, 1}, // 42
|
{"mulsc", 2, 1, false}, // 42
|
||||||
{"mulsc", 2, 1}, // 43
|
{"mulsc", 2, 1, false}, // 43
|
||||||
{"addsc", 2, 1}, // 44
|
{"addsc", 2, 1, false}, // 44
|
||||||
{"addsc", 2, 1}, // 45
|
{"addsc", 2, 1, false}, // 45
|
||||||
{"subsc", 2, 1}, // 46
|
{"subsc", 2, 1, false}, // 46
|
||||||
{"subsc", 2, 1}, // 47
|
{"subsc", 2, 1, false}, // 47
|
||||||
{"sin", 1, 1}, // 48
|
{"sin", 1, 1, false}, // 48
|
||||||
{"cos", 1, 1}, // 49
|
{"cos", 1, 1, false}, // 49
|
||||||
{"retain_prev", 1, 1}, // 50
|
{"retain_prev", 1, 1, false}, // 50
|
||||||
};
|
};
|
||||||
|
|
||||||
void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) {
|
void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) {
|
||||||
|
|
|
@ -58,6 +58,9 @@ class ShaderTranslator {
|
||||||
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
||||||
// True if the current shader overrides the pixel depth.
|
// True if the current shader overrides the pixel depth.
|
||||||
bool writes_depth() const { return writes_depth_; }
|
bool writes_depth() const { return writes_depth_; }
|
||||||
|
// True if the pixel shader can potentially have early depth/stencil testing
|
||||||
|
// enabled, provided alpha testing is disabled.
|
||||||
|
bool early_z_allowed() const { return early_z_allowed_; }
|
||||||
// A list of all vertex bindings, populated before translation occurs.
|
// A list of all vertex bindings, populated before translation occurs.
|
||||||
const std::vector<Shader::VertexBinding>& vertex_bindings() const {
|
const std::vector<Shader::VertexBinding>& vertex_bindings() const {
|
||||||
return vertex_bindings_;
|
return vertex_bindings_;
|
||||||
|
@ -160,6 +163,7 @@ class ShaderTranslator {
|
||||||
const char* name;
|
const char* name;
|
||||||
size_t argument_count;
|
size_t argument_count;
|
||||||
int src_swizzle_component_count;
|
int src_swizzle_component_count;
|
||||||
|
bool disable_early_z;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool TranslateInternal(Shader* shader);
|
bool TranslateInternal(Shader* shader);
|
||||||
|
@ -245,6 +249,7 @@ class ShaderTranslator {
|
||||||
bool uses_register_dynamic_addressing_ = false;
|
bool uses_register_dynamic_addressing_ = false;
|
||||||
bool writes_color_targets_[4] = {false, false, false, false};
|
bool writes_color_targets_[4] = {false, false, false, false};
|
||||||
bool writes_depth_ = false;
|
bool writes_depth_ = false;
|
||||||
|
bool early_z_allowed_ = true;
|
||||||
|
|
||||||
uint32_t memexport_alloc_count_ = 0;
|
uint32_t memexport_alloc_count_ = 0;
|
||||||
// For register allocation in implementations - what was used after each
|
// For register allocation in implementations - what was used after each
|
||||||
|
|
Loading…
Reference in New Issue