[GPU] Dynamic r# count via shader modifications + refactoring

This commit is contained in:
Triang3l 2020-12-19 16:14:54 +03:00
parent b106aa88e6
commit e6fa0ad139
30 changed files with 1684 additions and 1716 deletions

View File

@ -99,14 +99,11 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
}
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
const Shader* pixel_shader) const {
if (pixel_shader == nullptr) {
return 0;
}
uint32_t shader_writes_color_targets) const {
auto& regs = *register_file_;
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
for (uint32_t i = 0; i < 4; ++i) {
if (!pixel_shader->writes_color_target(i)) {
if (!(shader_writes_color_targets & (1 << i))) {
color_mask &= ~(0xF << (i * 4));
}
}
@ -167,14 +164,18 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN
: D3D12_SHADER_VISIBILITY_VERTEX;
uint32_t texture_count_vertex, sampler_count_vertex;
vertex_shader->GetTextureBindings(texture_count_vertex);
vertex_shader->GetSamplerBindings(sampler_count_vertex);
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
if (pixel_shader != nullptr) {
pixel_shader->GetTextureBindings(texture_count_pixel);
pixel_shader->GetSamplerBindings(sampler_count_pixel);
}
uint32_t texture_count_vertex =
uint32_t(vertex_shader->GetTextureBindingsAfterTranslation().size());
uint32_t sampler_count_vertex =
uint32_t(vertex_shader->GetSamplerBindingsAfterTranslation().size());
uint32_t texture_count_pixel =
pixel_shader
? uint32_t(pixel_shader->GetTextureBindingsAfterTranslation().size())
: 0;
uint32_t sampler_count_pixel =
pixel_shader
? uint32_t(pixel_shader->GetSamplerBindingsAfterTranslation().size())
: 0;
// Better put the pixel texture/sampler in the lower bits probably because it
// changes often.
@ -383,33 +384,26 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices(
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
RootBindfulExtraParameterIndices& indices_out) {
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
if (pixel_shader != nullptr) {
pixel_shader->GetTextureBindings(texture_count_pixel);
pixel_shader->GetSamplerBindings(sampler_count_pixel);
}
uint32_t texture_count_vertex, sampler_count_vertex;
vertex_shader->GetTextureBindings(texture_count_vertex);
vertex_shader->GetSamplerBindings(sampler_count_vertex);
uint32_t index = kRootParameter_Bindful_Count_Base;
if (texture_count_pixel != 0) {
if (pixel_shader &&
!pixel_shader->GetTextureBindingsAfterTranslation().empty()) {
indices_out.textures_pixel = index++;
} else {
indices_out.textures_pixel = RootBindfulExtraParameterIndices::kUnavailable;
}
if (sampler_count_pixel != 0) {
if (pixel_shader &&
!pixel_shader->GetSamplerBindingsAfterTranslation().empty()) {
indices_out.samplers_pixel = index++;
} else {
indices_out.samplers_pixel = RootBindfulExtraParameterIndices::kUnavailable;
}
if (texture_count_vertex != 0) {
if (!vertex_shader->GetTextureBindingsAfterTranslation().empty()) {
indices_out.textures_vertex = index++;
} else {
indices_out.textures_vertex =
RootBindfulExtraParameterIndices::kUnavailable;
}
if (sampler_count_vertex != 0) {
if (!vertex_shader->GetSamplerBindingsAfterTranslation().empty()) {
indices_out.samplers_vertex = index++;
} else {
indices_out.samplers_vertex =
@ -1839,10 +1833,14 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
// Need a pixel shader in normal color mode.
return false;
}
// Gather shader ucode information to get the color mask, which is needed by
// the render target cache, and memexport configuration, and also get the
// current shader modification bits.
DxbcShaderTranslator::Modification vertex_shader_modification;
DxbcShaderTranslator::Modification pixel_shader_modification;
if (!pipeline_cache_->GetCurrentShaderModifications(
vertex_shader_modification, pixel_shader_modification)) {
if (!pipeline_cache_->AnalyzeShaderUcodeAndGetCurrentModifications(
vertex_shader, pixel_shader, vertex_shader_modification,
pixel_shader_modification)) {
return false;
}
D3D12Shader::D3D12Translation* vertex_shader_translation =
@ -1854,13 +1852,6 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
pixel_shader->GetOrCreateTranslation(
pixel_shader_modification.value))
: nullptr;
// Translate the shaders now to get memexport configuration and color mask,
// which is needed by the render target cache, and also to get used textures
// and samplers.
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader_translation,
pixel_shader_translation)) {
return false;
}
bool tessellated = vertex_shader_modification.host_vertex_shader_type !=
Shader::HostVertexShaderType::kVertex;
@ -1889,7 +1880,10 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
BeginSubmission(true);
// Set up the render targets - this may bind pipelines.
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
uint32_t pixel_shader_writes_color_targets =
pixel_shader ? pixel_shader->writes_color_targets() : 0;
if (!render_target_cache_->UpdateRenderTargets(
pixel_shader_writes_color_targets)) {
return false;
}
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
@ -1958,13 +1952,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
line_loop_closing_index = 0;
}
// Update the textures - this may bind pipelines.
uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMask() |
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
texture_cache_->RequestTextures(used_texture_mask);
// Create the pipeline if needed and bind it.
// Translate the shaders and create the pipeline if needed.
void* pipeline_handle;
ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline(
@ -1974,6 +1962,17 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
pipeline_render_targets, &pipeline_handle, &root_signature)) {
return false;
}
// Update the textures - this may bind pipelines.
uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMaskAfterTranslation() |
(pixel_shader != nullptr
? pixel_shader->GetUsedTextureMaskAfterTranslation()
: 0);
texture_cache_->RequestTextures(used_texture_mask);
// Bind the pipeline after configuring it and doing everything that may bind
// other pipelines.
if (current_cached_pipeline_ != pipeline_handle) {
deferred_command_list_.SetPipelineStateHandle(
reinterpret_cast<void*>(pipeline_handle));
@ -2026,7 +2025,9 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
memexport_used, primitive_polygonal, line_loop_closing_index,
indexed ? index_buffer_info->endianness : xenos::Endian::kNone,
viewport_info, pixel_size_x, pixel_size_y, used_texture_mask,
GetCurrentColorMask(pixel_shader), pipeline_render_targets);
pixel_shader ? GetCurrentColorMask(pixel_shader->writes_color_targets())
: 0,
pipeline_render_targets);
// Update constant buffers, descriptors and root parameters.
if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) {
@ -2089,9 +2090,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
MemExportRange memexport_ranges[512];
uint32_t memexport_range_count = 0;
if (memexport_used_vertex) {
const std::vector<uint32_t>& memexport_stream_constants_vertex =
vertex_shader->memexport_stream_constants();
for (uint32_t constant_index : memexport_stream_constants_vertex) {
for (uint32_t constant_index :
vertex_shader->memexport_stream_constants()) {
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4);
if (memexport_stream.index_count == 0) {
@ -2132,9 +2132,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
}
}
if (memexport_used_pixel) {
const std::vector<uint32_t>& memexport_stream_constants_pixel =
pixel_shader->memexport_stream_constants();
for (uint32_t constant_index : memexport_stream_constants_pixel) {
for (uint32_t constant_index : pixel_shader->memexport_stream_constants()) {
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4);
if (memexport_stream.index_count == 0) {
@ -3588,20 +3586,21 @@ bool D3D12CommandProcessor::UpdateBindings(
vertex_shader->GetTextureBindingLayoutUserUID();
size_t sampler_layout_uid_vertex =
vertex_shader->GetSamplerBindingLayoutUserUID();
uint32_t texture_count_vertex, sampler_count_vertex;
const D3D12Shader::TextureBinding* textures_vertex =
vertex_shader->GetTextureBindings(texture_count_vertex);
const D3D12Shader::SamplerBinding* samplers_vertex =
vertex_shader->GetSamplerBindings(sampler_count_vertex);
const std::vector<D3D12Shader::TextureBinding>& textures_vertex =
vertex_shader->GetTextureBindingsAfterTranslation();
const std::vector<D3D12Shader::SamplerBinding>& samplers_vertex =
vertex_shader->GetSamplerBindingsAfterTranslation();
size_t texture_count_vertex = textures_vertex.size();
size_t sampler_count_vertex = samplers_vertex.size();
if (sampler_count_vertex) {
if (current_sampler_layout_uid_vertex_ != sampler_layout_uid_vertex) {
current_sampler_layout_uid_vertex_ = sampler_layout_uid_vertex;
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
bindful_samplers_written_vertex_ = false;
}
current_samplers_vertex_.resize(std::max(current_samplers_vertex_.size(),
size_t(sampler_count_vertex)));
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
current_samplers_vertex_.resize(
std::max(current_samplers_vertex_.size(), sampler_count_vertex));
for (size_t i = 0; i < sampler_count_vertex; ++i) {
TextureCache::SamplerParameters parameters =
texture_cache_->GetSamplerParameters(samplers_vertex[i]);
if (current_samplers_vertex_[i] != parameters) {
@ -3615,14 +3614,16 @@ bool D3D12CommandProcessor::UpdateBindings(
// Get textures and samplers used by the pixel shader, check if the last used
// samplers are compatible and update them.
size_t texture_layout_uid_pixel, sampler_layout_uid_pixel;
uint32_t texture_count_pixel, sampler_count_pixel;
const D3D12Shader::TextureBinding* textures_pixel;
const D3D12Shader::SamplerBinding* samplers_pixel;
const std::vector<D3D12Shader::TextureBinding>* textures_pixel;
const std::vector<D3D12Shader::SamplerBinding>* samplers_pixel;
size_t texture_count_pixel, sampler_count_pixel;
if (pixel_shader != nullptr) {
texture_layout_uid_pixel = pixel_shader->GetTextureBindingLayoutUserUID();
sampler_layout_uid_pixel = pixel_shader->GetSamplerBindingLayoutUserUID();
textures_pixel = pixel_shader->GetTextureBindings(texture_count_pixel);
samplers_pixel = pixel_shader->GetSamplerBindings(sampler_count_pixel);
textures_pixel = &pixel_shader->GetTextureBindingsAfterTranslation();
texture_count_pixel = textures_pixel->size();
samplers_pixel = &pixel_shader->GetSamplerBindingsAfterTranslation();
sampler_count_pixel = samplers_pixel->size();
if (sampler_count_pixel) {
if (current_sampler_layout_uid_pixel_ != sampler_layout_uid_pixel) {
current_sampler_layout_uid_pixel_ = sampler_layout_uid_pixel;
@ -3633,7 +3634,7 @@ bool D3D12CommandProcessor::UpdateBindings(
size_t(sampler_count_pixel)));
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
TextureCache::SamplerParameters parameters =
texture_cache_->GetSamplerParameters(samplers_pixel[i]);
texture_cache_->GetSamplerParameters((*samplers_pixel)[i]);
if (current_samplers_pixel_[i] != parameters) {
current_samplers_pixel_[i] = parameters;
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
@ -3663,7 +3664,7 @@ bool D3D12CommandProcessor::UpdateBindings(
cbuffer_binding_descriptor_indices_vertex_.up_to_date &&
(current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
current_texture_srv_keys_vertex_.data(), textures_vertex,
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
texture_count_vertex))) {
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
}
@ -3671,7 +3672,7 @@ bool D3D12CommandProcessor::UpdateBindings(
cbuffer_binding_descriptor_indices_pixel_.up_to_date &&
(current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
current_texture_srv_keys_pixel_.data(), textures_pixel,
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
texture_count_pixel))) {
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
}
@ -3804,15 +3805,14 @@ bool D3D12CommandProcessor::UpdateBindings(
uint32_t* descriptor_indices =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
frame_current_,
std::max(texture_count_vertex + sampler_count_vertex,
uint32_t(1)) *
std::max(texture_count_vertex + sampler_count_vertex, size_t(1)) *
sizeof(uint32_t),
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
&cbuffer_binding_descriptor_indices_vertex_.address));
if (!descriptor_indices) {
return false;
}
for (uint32_t i = 0; i < texture_count_vertex; ++i) {
for (size_t i = 0; i < texture_count_vertex; ++i) {
const D3D12Shader::TextureBinding& texture = textures_vertex[i];
descriptor_indices[texture.bindless_descriptor_index] =
texture_cache_->GetActiveTextureBindlessSRVIndex(texture) -
@ -3824,11 +3824,11 @@ bool D3D12CommandProcessor::UpdateBindings(
std::max(current_texture_srv_keys_vertex_.size(),
size_t(texture_count_vertex)));
texture_cache_->WriteActiveTextureSRVKeys(
current_texture_srv_keys_vertex_.data(), textures_vertex,
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
texture_count_vertex);
}
// Current samplers have already been updated.
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
for (size_t i = 0; i < sampler_count_vertex; ++i) {
descriptor_indices[samplers_vertex[i].bindless_descriptor_index] =
current_sampler_bindless_indices_vertex_[i];
}
@ -3841,15 +3841,15 @@ bool D3D12CommandProcessor::UpdateBindings(
uint32_t* descriptor_indices =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
frame_current_,
std::max(texture_count_pixel + sampler_count_pixel, uint32_t(1)) *
std::max(texture_count_pixel + sampler_count_pixel, size_t(1)) *
sizeof(uint32_t),
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
&cbuffer_binding_descriptor_indices_pixel_.address));
if (!descriptor_indices) {
return false;
}
for (uint32_t i = 0; i < texture_count_pixel; ++i) {
const D3D12Shader::TextureBinding& texture = textures_pixel[i];
for (size_t i = 0; i < texture_count_pixel; ++i) {
const D3D12Shader::TextureBinding& texture = (*textures_pixel)[i];
descriptor_indices[texture.bindless_descriptor_index] =
texture_cache_->GetActiveTextureBindlessSRVIndex(texture) -
uint32_t(SystemBindlessView::kUnboundedSRVsStart);
@ -3860,12 +3860,12 @@ bool D3D12CommandProcessor::UpdateBindings(
std::max(current_texture_srv_keys_pixel_.size(),
size_t(texture_count_pixel)));
texture_cache_->WriteActiveTextureSRVKeys(
current_texture_srv_keys_pixel_.data(), textures_pixel,
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
texture_count_pixel);
}
// Current samplers have already been updated.
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
descriptor_indices[samplers_pixel[i].bindless_descriptor_index] =
for (size_t i = 0; i < sampler_count_pixel; ++i) {
descriptor_indices[(*samplers_pixel)[i].bindless_descriptor_index] =
current_sampler_bindless_indices_pixel_[i];
}
cbuffer_binding_descriptor_indices_pixel_.up_to_date = true;
@ -3884,14 +3884,14 @@ bool D3D12CommandProcessor::UpdateBindings(
(!bindful_textures_written_vertex_ ||
current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
current_texture_srv_keys_vertex_.data(), textures_vertex,
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
texture_count_vertex));
bool write_textures_pixel =
texture_count_pixel &&
(!bindful_textures_written_pixel_ ||
current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
current_texture_srv_keys_pixel_.data(), textures_pixel,
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
texture_count_pixel));
bool write_samplers_vertex =
sampler_count_vertex && !bindful_samplers_written_vertex_;
@ -3899,7 +3899,7 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_count_pixel && !bindful_samplers_written_pixel_;
// Allocate the descriptors.
uint32_t view_count_partial_update = 0;
size_t view_count_partial_update = 0;
if (write_textures_vertex) {
view_count_partial_update += texture_count_vertex;
}
@ -3907,7 +3907,7 @@ bool D3D12CommandProcessor::UpdateBindings(
view_count_partial_update += texture_count_pixel;
}
// All the constants + shared memory SRV and UAV + textures.
uint32_t view_count_full_update =
size_t view_count_full_update =
2 + texture_count_vertex + texture_count_pixel;
if (edram_rov_used_) {
// + EDRAM UAV.
@ -3917,14 +3917,14 @@ bool D3D12CommandProcessor::UpdateBindings(
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle;
uint32_t descriptor_size_view = provider.GetViewDescriptorSize();
uint64_t view_heap_index = RequestViewBindfulDescriptors(
draw_view_bindful_heap_index_, view_count_partial_update,
view_count_full_update, view_cpu_handle, view_gpu_handle);
draw_view_bindful_heap_index_, uint32_t(view_count_partial_update),
uint32_t(view_count_full_update), view_cpu_handle, view_gpu_handle);
if (view_heap_index ==
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
XELOGE("Failed to allocate view descriptors");
return false;
}
uint32_t sampler_count_partial_update = 0;
size_t sampler_count_partial_update = 0;
if (write_samplers_vertex) {
sampler_count_partial_update += sampler_count_vertex;
}
@ -3938,9 +3938,10 @@ bool D3D12CommandProcessor::UpdateBindings(
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid;
if (sampler_count_vertex != 0 || sampler_count_pixel != 0) {
sampler_heap_index = RequestSamplerBindfulDescriptors(
draw_sampler_bindful_heap_index_, sampler_count_partial_update,
sampler_count_vertex + sampler_count_pixel, sampler_cpu_handle,
sampler_gpu_handle);
draw_sampler_bindful_heap_index_,
uint32_t(sampler_count_partial_update),
uint32_t(sampler_count_vertex + sampler_count_pixel),
sampler_cpu_handle, sampler_gpu_handle);
if (sampler_heap_index ==
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
XELOGE("Failed to allocate sampler descriptors");
@ -3985,7 +3986,7 @@ bool D3D12CommandProcessor::UpdateBindings(
assert_true(current_graphics_root_bindful_extras_.textures_vertex !=
RootBindfulExtraParameterIndices::kUnavailable);
gpu_handle_textures_vertex_ = view_gpu_handle;
for (uint32_t i = 0; i < texture_count_vertex; ++i) {
for (size_t i = 0; i < texture_count_vertex; ++i) {
texture_cache_->WriteActiveTextureBindfulSRV(textures_vertex[i],
view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
@ -3996,7 +3997,7 @@ bool D3D12CommandProcessor::UpdateBindings(
std::max(current_texture_srv_keys_vertex_.size(),
size_t(texture_count_vertex)));
texture_cache_->WriteActiveTextureSRVKeys(
current_texture_srv_keys_vertex_.data(), textures_vertex,
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
texture_count_vertex);
bindful_textures_written_vertex_ = true;
current_graphics_root_up_to_date_ &=
@ -4006,8 +4007,8 @@ bool D3D12CommandProcessor::UpdateBindings(
assert_true(current_graphics_root_bindful_extras_.textures_pixel !=
RootBindfulExtraParameterIndices::kUnavailable);
gpu_handle_textures_pixel_ = view_gpu_handle;
for (uint32_t i = 0; i < texture_count_pixel; ++i) {
texture_cache_->WriteActiveTextureBindfulSRV(textures_pixel[i],
for (size_t i = 0; i < texture_count_pixel; ++i) {
texture_cache_->WriteActiveTextureBindfulSRV((*textures_pixel)[i],
view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
@ -4016,7 +4017,7 @@ bool D3D12CommandProcessor::UpdateBindings(
current_texture_srv_keys_pixel_.resize(std::max(
current_texture_srv_keys_pixel_.size(), size_t(texture_count_pixel)));
texture_cache_->WriteActiveTextureSRVKeys(
current_texture_srv_keys_pixel_.data(), textures_pixel,
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
texture_count_pixel);
bindful_textures_written_pixel_ = true;
current_graphics_root_up_to_date_ &=
@ -4026,7 +4027,7 @@ bool D3D12CommandProcessor::UpdateBindings(
assert_true(current_graphics_root_bindful_extras_.samplers_vertex !=
RootBindfulExtraParameterIndices::kUnavailable);
gpu_handle_samplers_vertex_ = sampler_gpu_handle;
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
for (size_t i = 0; i < sampler_count_vertex; ++i) {
texture_cache_->WriteSampler(current_samplers_vertex_[i],
sampler_cpu_handle);
sampler_cpu_handle.ptr += descriptor_size_sampler;
@ -4041,7 +4042,7 @@ bool D3D12CommandProcessor::UpdateBindings(
assert_true(current_graphics_root_bindful_extras_.samplers_pixel !=
RootBindfulExtraParameterIndices::kUnavailable);
gpu_handle_samplers_pixel_ = sampler_gpu_handle;
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
for (size_t i = 0; i < sampler_count_pixel; ++i) {
texture_cache_->WriteSampler(current_samplers_pixel_[i],
sampler_cpu_handle);
sampler_cpu_handle.ptr += descriptor_size_sampler;

View File

@ -89,7 +89,7 @@ class D3D12CommandProcessor : public CommandProcessor {
// there are 4 render targets bound with the same EDRAM base (clearly not
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
// conflict with each other.
uint32_t GetCurrentColorMask(const Shader* pixel_shader) const;
uint32_t GetCurrentColorMask(uint32_t shader_writes_color_targets) const;
void PushTransitionBarrier(
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,

View File

@ -99,7 +99,7 @@ void D3D12Shader::D3D12Translation::DisassembleDxbcAndDxil(
}
Shader::Translation* D3D12Shader::CreateTranslationInstance(
uint32_t modification) {
uint64_t modification) {
return new D3D12Translation(*this, modification);
}

View File

@ -23,7 +23,7 @@ class D3D12Shader : public DxbcShader {
public:
class D3D12Translation : public DxbcTranslation {
public:
D3D12Translation(D3D12Shader& shader, uint32_t modification)
D3D12Translation(D3D12Shader& shader, uint64_t modification)
: DxbcTranslation(shader, modification) {}
void DisassembleDxbcAndDxil(const ui::d3d12::D3D12Provider& provider,
@ -60,7 +60,7 @@ class D3D12Shader : public DxbcShader {
}
protected:
Translation* CreateTranslationInstance(uint32_t modification) override;
Translation* CreateTranslationInstance(uint64_t modification) override;
private:
std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT;

View File

@ -18,6 +18,7 @@
#include <mutex>
#include <set>
#include <utility>
#include <vector>
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
@ -29,6 +30,7 @@
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/base/string.h"
#include "xenia/base/string_buffer.h"
#include "xenia/base/xxhash.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/gpu_flags.h"
@ -265,7 +267,7 @@ void PipelineCache::InitializeShaderStorage(
// collect used shader modifications to translate.
std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
// <Shader hash, modification bits>.
std::set<std::pair<uint64_t, uint32_t>> shader_translations_needed;
std::set<std::pair<uint64_t, uint64_t>> shader_translations_needed;
auto pipeline_storage_file_path =
shader_storage_shareable_root /
fmt::format("{:08X}.{}.d3d12.xpso", title_id,
@ -292,7 +294,6 @@ void PipelineCache::InitializeShaderStorage(
uint32_t magic;
uint32_t magic_api;
uint32_t version_swapped;
uint32_t device_features;
} pipeline_storage_file_header;
if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
1, pipeline_storage_file_) &&
@ -331,6 +332,9 @@ void PipelineCache::InitializeShaderStorage(
pipeline_stored_descriptions.resize(i);
break;
}
// TODO(Triang3l): On Vulkan, skip pipelines requiring unsupported
// device features (to keep the cache files mostly shareable across
// devices).
// Mark the shader modifications as needed for translation.
shader_translations_needed.emplace(
pipeline_stored_description.description.vertex_shader_hash,
@ -391,14 +395,14 @@ void PipelineCache::InitializeShaderStorage(
// Threads overlapping file reading.
std::mutex shaders_translation_thread_mutex;
std::condition_variable shaders_translation_thread_cond;
std::deque<std::pair<ShaderStoredHeader, D3D12Shader::D3D12Translation*>>
shaders_to_translate;
std::deque<D3D12Shader*> shaders_to_translate;
size_t shader_translation_threads_busy = 0;
bool shader_translation_threads_shutdown = false;
std::mutex shaders_failed_to_translate_mutex;
std::vector<D3D12Shader::D3D12Translation*> shaders_failed_to_translate;
auto shader_translation_thread_function = [&]() {
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
StringBuffer ucode_disasm_buffer;
DxbcShaderTranslator translator(
provider.GetAdapterVendorID(), bindless_resources_used_,
edram_rov_used_, provider.GetGraphicsAnalysis() != nullptr);
@ -416,8 +420,7 @@ void PipelineCache::InitializeShaderStorage(
IID_PPV_ARGS(&dxc_compiler));
}
for (;;) {
std::pair<ShaderStoredHeader, D3D12Shader::D3D12Translation*>
shader_to_translate;
D3D12Shader* shader_to_translate;
for (;;) {
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
if (shaders_to_translate.empty()) {
@ -432,12 +435,29 @@ void PipelineCache::InitializeShaderStorage(
++shader_translation_threads_busy;
break;
}
assert_not_null(shader_to_translate.second);
if (!TranslateShader(translator, *shader_to_translate.second,
shader_to_translate.first.sq_program_cntl,
dxbc_converter, dxc_utils, dxc_compiler)) {
std::lock_guard<std::mutex> lock(shaders_failed_to_translate_mutex);
shaders_failed_to_translate.push_back(shader_to_translate.second);
shader_to_translate->AnalyzeUcode(ucode_disasm_buffer);
// Translate each needed modification on this thread after performing
// modification-independent analysis of the whole shader.
uint64_t ucode_data_hash = shader_to_translate->ucode_data_hash();
for (auto modification_it = shader_translations_needed.lower_bound(
std::make_pair(ucode_data_hash, uint64_t(0)));
modification_it != shader_translations_needed.end() &&
modification_it->first == ucode_data_hash;
++modification_it) {
D3D12Shader::D3D12Translation* translation =
static_cast<D3D12Shader::D3D12Translation*>(
shader_to_translate->GetOrCreateTranslation(
modification_it->second));
// Only try (and delete in case of failure) if it's a new translation.
// If it's a shader previously encountered in the game, translation of
// which has failed, and the shader storage is loaded later, keep it
// this way not to try to translate it again.
if (!translation->is_translated() &&
!TranslateAnalyzedShader(translator, *translation, dxbc_converter,
dxc_utils, dxc_compiler)) {
std::lock_guard<std::mutex> lock(shaders_failed_to_translate_mutex);
shaders_failed_to_translate.push_back(translation);
}
}
{
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
@ -477,59 +497,41 @@ void PipelineCache::InitializeShaderStorage(
break;
}
shader_storage_valid_bytes += sizeof(shader_header) + ucode_byte_count;
// Only add the shader if needed.
auto modification_it = shader_translations_needed.lower_bound(
std::make_pair(ucode_data_hash, uint32_t(0)));
if (modification_it == shader_translations_needed.end() ||
modification_it->first != ucode_data_hash) {
continue;
}
D3D12Shader* shader =
LoadShader(shader_header.type, ucode_dwords.data(),
shader_header.ucode_dword_count, ucode_data_hash);
if (shader->ucode_storage_index() == shader_storage_index_) {
// Appeared twice in this file for some reason - skip, otherwise race
// condition will be caused by translating twice in parallel.
continue;
}
// Loaded from the current storage - don't write again.
shader->set_ucode_storage_index(shader_storage_index_);
// Translate all the needed modifications.
for (; modification_it != shader_translations_needed.end() &&
modification_it->first == ucode_data_hash;
++modification_it) {
bool translation_is_new;
D3D12Shader::D3D12Translation* translation =
static_cast<D3D12Shader::D3D12Translation*>(
shader->GetOrCreateTranslation(modification_it->second,
&translation_is_new));
if (!translation_is_new) {
// Already added - usually shaders aren't added without the intention
// of translating them imminently, so don't do additional checks to
// actually ensure that translation happens right now (they would
// cause a race condition with shaders currently queued for
// translation).
continue;
}
// Create new threads if the currently existing threads can't keep up
// with file reading, but not more than the number of logical processors
// minus one.
size_t shader_translation_threads_needed;
{
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
shader_translation_threads_needed =
std::min(shader_translation_threads_busy +
shaders_to_translate.size() + size_t(1),
logical_processor_count - size_t(1));
}
while (shader_translation_threads.size() <
shader_translation_threads_needed) {
shader_translation_threads.push_back(xe::threading::Thread::Create(
{}, shader_translation_thread_function));
shader_translation_threads.back()->set_name("Shader Translation");
}
{
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
shaders_to_translate.emplace_back(shader_header, translation);
}
shaders_translation_thread_cond.notify_one();
++shaders_translated;
// Create new threads if the currently existing threads can't keep up
// with file reading, but not more than the number of logical processors
// minus one.
size_t shader_translation_threads_needed;
{
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
shader_translation_threads_needed =
std::min(shader_translation_threads_busy +
shaders_to_translate.size() + size_t(1),
logical_processor_count - size_t(1));
}
while (shader_translation_threads.size() <
shader_translation_threads_needed) {
shader_translation_threads.push_back(xe::threading::Thread::Create(
{}, shader_translation_thread_function));
shader_translation_threads.back()->set_name("Shader Translation");
}
// Request ucode information gathering and translation of all the needed
// shaders.
{
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
shaders_to_translate.push_back(shader);
}
shaders_translation_thread_cond.notify_one();
++shaders_translated;
}
if (!shader_translation_threads.empty()) {
{
@ -593,6 +595,8 @@ void PipelineCache::InitializeShaderStorage(
pipeline_stored_descriptions) {
const PipelineDescription& pipeline_description =
pipeline_stored_description.description;
// TODO(Triang3l): On Vulkan, skip pipelines requiring unsupported device
// features (to keep the cache files mostly shareable across devices).
// Skip already known pipelines - those have already been enqueued.
auto found_range =
pipelines_.equal_range(pipeline_stored_description.description_hash);
@ -621,6 +625,7 @@ void PipelineCache::InitializeShaderStorage(
vertex_shader->GetTranslation(
pipeline_description.vertex_shader_modification));
if (!pipeline_runtime_description.vertex_shader ||
!pipeline_runtime_description.vertex_shader->is_translated() ||
!pipeline_runtime_description.vertex_shader->is_valid()) {
continue;
}
@ -637,6 +642,7 @@ void PipelineCache::InitializeShaderStorage(
pixel_shader->GetTranslation(
pipeline_description.pixel_shader_modification));
if (!pipeline_runtime_description.pixel_shader ||
!pipeline_runtime_description.pixel_shader->is_translated() ||
!pipeline_runtime_description.pixel_shader->is_valid()) {
continue;
}
@ -730,9 +736,6 @@ void PipelineCache::InitializeShaderStorage(
pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
pipeline_storage_file_header.version_swapped =
pipeline_storage_version_swapped;
// Reserved for future (for Vulkan) - host device features affecting legal
// pipeline descriptions.
pipeline_storage_file_header.device_features = 0;
fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
1, pipeline_storage_file_);
}
@ -854,52 +857,68 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
return shader;
}
bool PipelineCache::GetCurrentShaderModifications(
bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
DxbcShaderTranslator::Modification& vertex_shader_modification_out,
DxbcShaderTranslator::Modification& pixel_shader_modification_out) const {
DxbcShaderTranslator::Modification& pixel_shader_modification_out) {
Shader::HostVertexShaderType host_vertex_shader_type =
GetCurrentHostVertexShaderTypeIfValid();
if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) {
return false;
}
const auto& regs = register_file_;
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
vertex_shader->AnalyzeUcode(ucode_disasm_buffer_);
vertex_shader_modification_out = DxbcShaderTranslator::Modification(
shader_translator_->GetDefaultModification(xenos::ShaderType::kVertex,
host_vertex_shader_type));
DxbcShaderTranslator::Modification pixel_shader_modification(
shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel));
if (!edram_rov_used_) {
const auto& regs = register_file_;
using DepthStencilMode =
DxbcShaderTranslator::Modification::DepthStencilMode;
if ((depth_float24_conversion_ ==
flags::DepthFloat24Conversion::kOnOutputTruncating ||
depth_float24_conversion_ ==
flags::DepthFloat24Conversion::kOnOutputRounding) &&
regs.Get<reg::RB_DEPTHCONTROL>().z_enable &&
regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
xenos::DepthRenderTargetFormat::kD24FS8) {
pixel_shader_modification.depth_stencil_mode =
depth_float24_conversion_ ==
flags::DepthFloat24Conversion::kOnOutputTruncating
? DepthStencilMode::kFloat24Truncating
: DepthStencilMode::kFloat24Rounding;
} else {
// Hint to enable early depth/stencil writing if possible - whether it
// will actually take effect depends on the shader itself, it's not known
// before translation.
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
if ((!rb_colorcontrol.alpha_test_enable ||
rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) &&
!rb_colorcontrol.alpha_to_mask_enable) {
shader_translator_->GetDefaultModification(
xenos::ShaderType::kVertex,
vertex_shader->GetDynamicAddressableRegisterCount(
sq_program_cntl.vs_num_reg),
host_vertex_shader_type));
if (pixel_shader) {
pixel_shader->AnalyzeUcode(ucode_disasm_buffer_);
DxbcShaderTranslator::Modification pixel_shader_modification(
shader_translator_->GetDefaultModification(
xenos::ShaderType::kPixel,
pixel_shader->GetDynamicAddressableRegisterCount(
sq_program_cntl.ps_num_reg)));
if (!edram_rov_used_) {
using DepthStencilMode =
DxbcShaderTranslator::Modification::DepthStencilMode;
if ((depth_float24_conversion_ ==
flags::DepthFloat24Conversion::kOnOutputTruncating ||
depth_float24_conversion_ ==
flags::DepthFloat24Conversion::kOnOutputRounding) &&
regs.Get<reg::RB_DEPTHCONTROL>().z_enable &&
regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
xenos::DepthRenderTargetFormat::kD24FS8) {
pixel_shader_modification.depth_stencil_mode =
DepthStencilMode::kEarlyHint;
depth_float24_conversion_ ==
flags::DepthFloat24Conversion::kOnOutputTruncating
? DepthStencilMode::kFloat24Truncating
: DepthStencilMode::kFloat24Rounding;
} else {
pixel_shader_modification.depth_stencil_mode =
DepthStencilMode::kNoModifiers;
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
if (pixel_shader->implicit_early_z_write_allowed() &&
(!rb_colorcontrol.alpha_test_enable ||
rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) &&
!rb_colorcontrol.alpha_to_mask_enable) {
pixel_shader_modification.depth_stencil_mode =
DepthStencilMode::kEarlyHint;
} else {
pixel_shader_modification.depth_stencil_mode =
DepthStencilMode::kNoModifiers;
}
}
}
pixel_shader_modification_out = pixel_shader_modification;
} else {
pixel_shader_modification_out = DxbcShaderTranslator::Modification(
shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel,
0));
}
pixel_shader_modification_out = pixel_shader_modification;
return true;
}
@ -979,62 +998,6 @@ PipelineCache::GetCurrentHostVertexShaderTypeIfValid() const {
return Shader::HostVertexShaderType(-1);
}
bool PipelineCache::EnsureShadersTranslated(
D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader) {
const auto& regs = register_file_;
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
// Edge flags are not supported yet (because polygon primitives are not).
assert_true(sq_program_cntl.vs_export_mode !=
xenos::VertexShaderExportMode::kPosition2VectorsEdge &&
sq_program_cntl.vs_export_mode !=
xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill);
assert_false(sq_program_cntl.gen_index_vtx);
if (!vertex_shader->is_translated()) {
if (!TranslateShader(*shader_translator_, *vertex_shader, sq_program_cntl,
dxbc_converter_, dxc_utils_, dxc_compiler_)) {
XELOGE("Failed to translate the vertex shader!");
return false;
}
if (shader_storage_file_ && vertex_shader->shader().ucode_storage_index() !=
shader_storage_index_) {
vertex_shader->shader().set_ucode_storage_index(shader_storage_index_);
assert_not_null(storage_write_thread_);
shader_storage_file_flush_needed_ = true;
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_shader_queue_.push_back(
std::make_pair(&vertex_shader->shader(), sq_program_cntl));
}
storage_write_request_cond_.notify_all();
}
}
if (pixel_shader != nullptr && !pixel_shader->is_translated()) {
if (!TranslateShader(*shader_translator_, *pixel_shader, sq_program_cntl,
dxbc_converter_, dxc_utils_, dxc_compiler_)) {
XELOGE("Failed to translate the pixel shader!");
return false;
}
if (shader_storage_file_ &&
pixel_shader->shader().ucode_storage_index() != shader_storage_index_) {
pixel_shader->shader().set_ucode_storage_index(shader_storage_index_);
assert_not_null(storage_write_thread_);
shader_storage_file_flush_needed_ = true;
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_shader_queue_.push_back(
std::make_pair(&pixel_shader->shader(), sq_program_cntl));
}
storage_write_request_cond_.notify_all();
}
}
return true;
}
bool PipelineCache::ConfigurePipeline(
D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader,
@ -1078,8 +1041,50 @@ bool PipelineCache::ConfigurePipeline(
}
}
if (!EnsureShadersTranslated(vertex_shader, pixel_shader)) {
return false;
// Ensure shaders are translated.
// Edge flags are not supported yet (because polygon primitives are not).
assert_true(register_file_.Get<reg::SQ_PROGRAM_CNTL>().vs_export_mode !=
xenos::VertexShaderExportMode::kPosition2VectorsEdge &&
register_file_.Get<reg::SQ_PROGRAM_CNTL>().vs_export_mode !=
xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill);
assert_false(register_file_.Get<reg::SQ_PROGRAM_CNTL>().gen_index_vtx);
if (!vertex_shader->is_translated()) {
vertex_shader->shader().AnalyzeUcode(ucode_disasm_buffer_);
if (!TranslateAnalyzedShader(*shader_translator_, *vertex_shader,
dxbc_converter_, dxc_utils_, dxc_compiler_)) {
XELOGE("Failed to translate the vertex shader!");
return false;
}
if (shader_storage_file_ && vertex_shader->shader().ucode_storage_index() !=
shader_storage_index_) {
vertex_shader->shader().set_ucode_storage_index(shader_storage_index_);
assert_not_null(storage_write_thread_);
shader_storage_file_flush_needed_ = true;
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_shader_queue_.push_back(&vertex_shader->shader());
}
storage_write_request_cond_.notify_all();
}
}
if (pixel_shader != nullptr && !pixel_shader->is_translated()) {
pixel_shader->shader().AnalyzeUcode(ucode_disasm_buffer_);
if (!TranslateAnalyzedShader(*shader_translator_, *pixel_shader,
dxbc_converter_, dxc_utils_, dxc_compiler_)) {
XELOGE("Failed to translate the pixel shader!");
return false;
}
if (shader_storage_file_ &&
pixel_shader->shader().ucode_storage_index() != shader_storage_index_) {
pixel_shader->shader().set_ucode_storage_index(shader_storage_index_);
assert_not_null(storage_write_thread_);
shader_storage_file_flush_needed_ = true;
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_shader_queue_.push_back(&pixel_shader->shader());
}
storage_write_request_cond_.notify_all();
}
}
Pipeline* new_pipeline = new Pipeline;
@ -1121,17 +1126,15 @@ bool PipelineCache::ConfigurePipeline(
return true;
}
bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator,
D3D12Shader::D3D12Translation& translation,
reg::SQ_PROGRAM_CNTL cntl,
IDxbcConverter* dxbc_converter,
IDxcUtils* dxc_utils,
IDxcCompiler* dxc_compiler) {
bool PipelineCache::TranslateAnalyzedShader(
DxbcShaderTranslator& translator,
D3D12Shader::D3D12Translation& translation, IDxbcConverter* dxbc_converter,
IDxcUtils* dxc_utils, IDxcCompiler* dxc_compiler) {
D3D12Shader& shader = static_cast<D3D12Shader&>(translation.shader());
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (!translator.Translate(translation, cntl)) {
if (!translator.TranslateAnalyzedShader(translation)) {
XELOGE("Shader {:016X} translation failed; marking as ignored",
shader.ucode_data_hash());
return false;
@ -1171,21 +1174,21 @@ bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator,
// Set up texture and sampler binding layouts.
if (shader.EnterBindingLayoutUserUIDSetup()) {
uint32_t texture_binding_count;
const D3D12Shader::TextureBinding* texture_bindings =
shader.GetTextureBindings(texture_binding_count);
uint32_t sampler_binding_count;
const D3D12Shader::SamplerBinding* sampler_bindings =
shader.GetSamplerBindings(sampler_binding_count);
const std::vector<D3D12Shader::TextureBinding>& texture_bindings =
shader.GetTextureBindingsAfterTranslation();
uint32_t texture_binding_count = uint32_t(texture_bindings.size());
const std::vector<D3D12Shader::SamplerBinding>& sampler_bindings =
shader.GetSamplerBindingsAfterTranslation();
uint32_t sampler_binding_count = uint32_t(sampler_bindings.size());
assert_false(bindless_resources_used_ &&
texture_binding_count + sampler_binding_count >
D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4);
size_t texture_binding_layout_bytes =
texture_binding_count * sizeof(*texture_bindings);
texture_binding_count * sizeof(*texture_bindings.data());
uint64_t texture_binding_layout_hash = 0;
if (texture_binding_count) {
texture_binding_layout_hash =
XXH3_64bits(texture_bindings, texture_binding_layout_bytes);
XXH3_64bits(texture_bindings.data(), texture_binding_layout_bytes);
}
uint32_t bindless_sampler_count =
bindless_resources_used_ ? sampler_binding_count : 0;
@ -1223,7 +1226,8 @@ bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator,
if (it->second.vector_span_length == texture_binding_count &&
!std::memcmp(texture_binding_layouts_.data() +
it->second.vector_span_offset,
texture_bindings, texture_binding_layout_bytes)) {
texture_bindings.data(),
texture_binding_layout_bytes)) {
texture_binding_layout_uid = it->second.uid;
break;
}
@ -1242,7 +1246,7 @@ bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator,
texture_binding_count);
std::memcpy(
texture_binding_layouts_.data() + new_uid.vector_span_offset,
texture_bindings, texture_binding_layout_bytes);
texture_bindings.data(), texture_binding_layout_bytes);
texture_binding_layout_map_.emplace(texture_binding_layout_hash,
new_uid);
}
@ -1576,8 +1580,10 @@ bool PipelineCache::GetCurrentStateDescription(
// Render targets and blending state. 32 because of 0x1F mask, for safety
// (all unknown to zero).
uint32_t color_mask = command_processor_.GetCurrentColorMask(
pixel_shader ? &pixel_shader->shader() : nullptr);
uint32_t color_mask =
pixel_shader ? command_processor_.GetCurrentColorMask(
pixel_shader->shader().writes_color_targets())
: 0;
static const PipelineBlendFactor kBlendFactorMap[32] = {
/* 0 */ PipelineBlendFactor::kZero,
/* 1 */ PipelineBlendFactor::kOne,
@ -2038,7 +2044,7 @@ void PipelineCache::StorageWriteThread() {
fflush(pipeline_storage_file_);
}
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
const Shader* shader = nullptr;
PipelineStoredDescription pipeline_description;
bool write_pipeline = false;
{
@ -2047,7 +2053,7 @@ void PipelineCache::StorageWriteThread() {
return;
}
if (!storage_write_shader_queue_.empty()) {
shader_pair = storage_write_shader_queue_.front();
shader = storage_write_shader_queue_.front();
storage_write_shader_queue_.pop_front();
} else if (storage_write_flush_shaders_) {
storage_write_flush_shaders_ = false;
@ -2063,18 +2069,16 @@ void PipelineCache::StorageWriteThread() {
storage_write_flush_pipelines_ = false;
flush_pipelines = true;
}
if (!shader_pair.first && !write_pipeline) {
if (!shader && !write_pipeline) {
storage_write_request_cond_.wait(lock);
continue;
}
}
const Shader* shader = shader_pair.first;
if (shader) {
shader_header.ucode_data_hash = shader->ucode_data_hash();
shader_header.ucode_dword_count = shader->ucode_dword_count();
shader_header.type = shader->type();
shader_header.sq_program_cntl = shader_pair.second;
assert_not_null(shader_storage_file_);
fwrite(&shader_header, sizeof(shader_header), 1, shader_storage_file_);
if (shader_header.ucode_dword_count) {

View File

@ -23,6 +23,7 @@
#include "xenia/base/hash.h"
#include "xenia/base/platform.h"
#include "xenia/base/string_buffer.h"
#include "xenia/base/threading.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/d3d12/render_target_cache.h"
@ -63,15 +64,12 @@ class PipelineCache {
D3D12Shader* LoadShader(xenos::ShaderType shader_type,
const uint32_t* host_address, uint32_t dword_count);
// Retrieves the shader modifications for the current state, and returns
// whether they are valid.
bool GetCurrentShaderModifications(
// Ensures microcode is analyzed, retrieves the shader modifications for the
// current state, and returns whether they are valid.
bool AnalyzeShaderUcodeAndGetCurrentModifications(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
DxbcShaderTranslator::Modification& vertex_shader_modification_out,
DxbcShaderTranslator::Modification& pixel_shader_modification_out) const;
// Translates shaders if needed, also making shader info up to date.
bool EnsureShadersTranslated(D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader);
DxbcShaderTranslator::Modification& pixel_shader_modification_out);
bool ConfigurePipeline(
D3D12Shader::D3D12Translation* vertex_shader,
@ -93,9 +91,7 @@ class PipelineCache {
uint32_t ucode_dword_count : 31;
xenos::ShaderType type : 1;
reg::SQ_PROGRAM_CNTL sq_program_cntl;
static constexpr uint32_t kVersion = 0x20201207;
static constexpr uint32_t kVersion = 0x20201219;
});
// Update PipelineDescription::kVersion if any of the Pipeline* enums are
@ -171,10 +167,10 @@ class PipelineCache {
XEPACKEDSTRUCT(PipelineDescription, {
uint64_t vertex_shader_hash;
uint64_t vertex_shader_modification;
// 0 if drawing without a pixel shader.
uint64_t pixel_shader_hash;
uint32_t vertex_shader_modification;
uint32_t pixel_shader_modification;
uint64_t pixel_shader_modification;
int32_t depth_bias;
float depth_bias_slope_scaled;
@ -208,7 +204,7 @@ class PipelineCache {
PipelineRenderTarget render_targets[4];
static constexpr uint32_t kVersion = 0x20201207;
static constexpr uint32_t kVersion = 0x20201219;
});
XEPACKEDSTRUCT(PipelineStoredDescription, {
@ -232,12 +228,11 @@ class PipelineCache {
uint64_t data_hash);
// Can be called from multiple threads.
bool TranslateShader(DxbcShaderTranslator& translator,
D3D12Shader::D3D12Translation& translation,
reg::SQ_PROGRAM_CNTL cntl,
IDxbcConverter* dxbc_converter = nullptr,
IDxcUtils* dxc_utils = nullptr,
IDxcCompiler* dxc_compiler = nullptr);
bool TranslateAnalyzedShader(DxbcShaderTranslator& translator,
D3D12Shader::D3D12Translation& translation,
IDxbcConverter* dxbc_converter = nullptr,
IDxcUtils* dxc_utils = nullptr,
IDxcCompiler* dxc_compiler = nullptr);
bool GetCurrentStateDescription(
D3D12Shader::D3D12Translation* vertex_shader,
@ -257,7 +252,9 @@ class PipelineCache {
flags::DepthFloat24Conversion depth_float24_conversion_;
uint32_t resolution_scale_;
// Reusable shader translator.
// Temporary storage for AnalyzeUcode calls on the processor thread.
StringBuffer ucode_disasm_buffer_;
// Reusable shader translator for the processor thread.
std::unique_ptr<DxbcShaderTranslator> shader_translator_;
// Command processor thread DXIL conversion/disassembly interfaces, if DXIL
@ -332,8 +329,7 @@ class PipelineCache {
std::condition_variable storage_write_request_cond_;
// Storage thread input is protected with storage_write_request_lock_, and the
// thread is notified about its change via storage_write_request_cond_.
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
storage_write_shader_queue_;
std::deque<const Shader*> storage_write_shader_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
bool storage_write_flush_shaders_ = false;
bool storage_write_flush_pipelines_ = false;

View File

@ -535,7 +535,8 @@ void RenderTargetCache::EndFrame() {
FlushAndUnbindRenderTargets();
}
bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
bool RenderTargetCache::UpdateRenderTargets(
uint32_t shader_writes_color_targets) {
// There are two kinds of render target binding updates in this implementation
// in case something has been changed - full and partial.
//
@ -635,7 +636,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
uint32_t edram_bases[5];
uint32_t formats[5];
bool formats_are_64bpp[5];
uint32_t color_mask = command_processor_.GetCurrentColorMask(pixel_shader);
uint32_t color_mask =
command_processor_.GetCurrentColorMask(shader_writes_color_targets);
for (uint32_t i = 0; i < 4; ++i) {
enabled[i] = (color_mask & (0xF << (i * 4))) != 0;
auto color_info = regs.Get<reg::RB_COLOR_INFO>(

View File

@ -269,7 +269,7 @@ class RenderTargetCache {
void EndFrame();
// Called in the beginning of a draw call - may bind pipelines and change the
// view descriptor heap.
bool UpdateRenderTargets(const D3D12Shader* pixel_shader);
bool UpdateRenderTargets(uint32_t shader_writes_color_targets);
// Returns the host-to-guest mappings and host formats of currently bound
// render targets for pipeline creation and remapping in shaders. They are
// consecutive, and format DXGI_FORMAT_UNKNOWN terminates the list. Depth

View File

@ -1334,8 +1334,8 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
bool TextureCache::AreActiveTextureSRVKeysUpToDate(
const TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const {
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
size_t host_shader_binding_count) const {
for (size_t i = 0; i < host_shader_binding_count; ++i) {
const TextureSRVKey& key = keys[i];
const TextureBinding& binding =
texture_bindings_[host_shader_bindings[i].fetch_constant];
@ -1350,8 +1350,8 @@ bool TextureCache::AreActiveTextureSRVKeysUpToDate(
void TextureCache::WriteActiveTextureSRVKeys(
TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const {
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
size_t host_shader_binding_count) const {
for (size_t i = 0; i < host_shader_binding_count; ++i) {
TextureSRVKey& key = keys[i];
const TextureBinding& binding =
texture_bindings_[host_shader_bindings[i].fetch_constant];

View File

@ -196,14 +196,14 @@ class TextureCache {
bool AreActiveTextureSRVKeysUpToDate(
const TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const;
size_t host_shader_binding_count) const;
// Exports the current binding data to texture SRV keys so they can be stored
// for checking whether subsequent draw calls can keep using the same
// bindings. Write host_shader_binding_count keys.
void WriteActiveTextureSRVKeys(
TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const;
size_t host_shader_binding_count) const;
// Returns the post-swizzle signedness of a currently bound texture (must be
// called after RequestTextures).
uint8_t GetActiveTextureSwizzledSigns(uint32_t index) const {

View File

@ -19,7 +19,7 @@ DxbcShader::DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash,
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
Shader::Translation* DxbcShader::CreateTranslationInstance(
uint32_t modification) {
uint64_t modification) {
return new DxbcTranslation(*this, modification);
}

View File

@ -10,6 +10,7 @@
#ifndef XENIA_GPU_DXBC_SHADER_H_
#define XENIA_GPU_DXBC_SHADER_H_
#include <atomic>
#include <vector>
#include "xenia/gpu/dxbc_shader_translator.h"
@ -23,13 +24,17 @@ class DxbcShader : public Shader {
public:
class DxbcTranslation : public Translation {
public:
DxbcTranslation(DxbcShader& shader, uint32_t modification)
DxbcTranslation(DxbcShader& shader, uint64_t modification)
: Translation(shader, modification) {}
};
DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count);
// Resource bindings are gathered after the successful translation of any
// modification for simplicity of translation (and they don't depend on
// modification bits).
static constexpr uint32_t kMaxTextureBindingIndexBits =
DxbcShaderTranslator::kMaxTextureBindingIndexBits;
static constexpr uint32_t kMaxTextureBindings =
@ -43,11 +48,13 @@ class DxbcShader : public Shader {
bool is_signed;
};
// Safe to hash and compare with memcmp for layout hashing.
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
count_out = uint32_t(texture_bindings_.size());
return texture_bindings_.data();
const std::vector<TextureBinding>& GetTextureBindingsAfterTranslation()
const {
return texture_bindings_;
}
const uint32_t GetUsedTextureMaskAfterTranslation() const {
return used_texture_mask_;
}
const uint32_t GetUsedTextureMask() const { return used_texture_mask_; }
static constexpr uint32_t kMaxSamplerBindingIndexBits =
DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
@ -61,17 +68,18 @@ class DxbcShader : public Shader {
xenos::TextureFilter mip_filter;
xenos::AnisoFilter aniso_filter;
};
const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const {
count_out = uint32_t(sampler_bindings_.size());
return sampler_bindings_.data();
const std::vector<SamplerBinding>& GetSamplerBindingsAfterTranslation()
const {
return sampler_bindings_;
}
protected:
Translation* CreateTranslationInstance(uint32_t modification) override;
Translation* CreateTranslationInstance(uint64_t modification) override;
private:
friend class DxbcShaderTranslator;
std::atomic_flag bindings_setup_entered_ = ATOMIC_FLAG_INIT;
std::vector<TextureBinding> texture_bindings_;
std::vector<SamplerBinding> sampler_bindings_;
uint32_t used_texture_mask_ = 0;

View File

@ -10,6 +10,7 @@
#include "xenia/gpu/dxbc_shader_translator.h"
#include <algorithm>
#include <atomic>
#include <cstring>
#include <memory>
@ -78,16 +79,23 @@ DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
DxbcShaderTranslator::~DxbcShaderTranslator() = default;
std::vector<uint8_t> DxbcShaderTranslator::CreateDepthOnlyPixelShader() {
Reset(xenos::ShaderType::kPixel);
is_depth_only_pixel_shader_ = true;
StartTranslation();
return std::move(CompleteTranslation());
// TODO(Triang3l): Handle in a nicer way (is_depth_only_pixel_shader_ is a
// leftover from when a Shader object wasn't used during translation).
Shader shader(xenos::ShaderType::kPixel, 0, nullptr, 0);
shader.AnalyzeUcode(instruction_disassembly_buffer_);
Shader::Translation& translation = *shader.GetOrCreateTranslation(0);
TranslateAnalyzedShader(translation);
is_depth_only_pixel_shader_ = false;
return translation.translated_binary();
}
uint32_t DxbcShaderTranslator::GetDefaultModification(
xenos::ShaderType shader_type,
uint64_t DxbcShaderTranslator::GetDefaultModification(
xenos::ShaderType shader_type, uint32_t dynamic_addressable_register_count,
Shader::HostVertexShaderType host_vertex_shader_type) const {
Modification shader_modification;
shader_modification.dynamic_addressable_register_count =
dynamic_addressable_register_count;
switch (shader_type) {
case xenos::ShaderType::kVertex:
shader_modification.host_vertex_shader_type = host_vertex_shader_type;
@ -100,13 +108,11 @@ uint32_t DxbcShaderTranslator::GetDefaultModification(
return shader_modification.value;
}
void DxbcShaderTranslator::Reset(xenos::ShaderType shader_type) {
ShaderTranslator::Reset(shader_type);
void DxbcShaderTranslator::Reset() {
ShaderTranslator::Reset();
shader_code_.clear();
is_depth_only_pixel_shader_ = false;
cbuffer_count_ = 0;
// System constants always used in prologues/epilogues.
cbuffer_index_system_constants_ = cbuffer_count_++;
@ -231,6 +237,10 @@ void DxbcShaderTranslator::DxbcSrc::Write(std::vector<uint32_t>& code,
}
}
uint32_t DxbcShaderTranslator::GetModificationRegisterCount() const {
return GetDxbcShaderModification().dynamic_addressable_register_count;
}
bool DxbcShaderTranslator::UseSwitchForControlFlow() const {
// Xenia crashes on Intel HD Graphics 4000 with switch.
return cvars::dxbc_switch && vendor_id_ != 0x8086;
@ -239,7 +249,8 @@ bool DxbcShaderTranslator::UseSwitchForControlFlow() const {
uint32_t DxbcShaderTranslator::PushSystemTemp(uint32_t zero_mask,
uint32_t count) {
uint32_t register_index = system_temp_count_current_;
if (!uses_register_dynamic_addressing() && !is_depth_only_pixel_shader_) {
if (!is_depth_only_pixel_shader_ &&
!current_shader().uses_register_dynamic_addressing()) {
// Guest shader registers first if they're not in x0. Depth-only pixel
// shader is a special case of the DXBC translator usage, where there are no
// GPRs because there's no shader to translate, and a guest shader is not
@ -327,10 +338,13 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
return;
}
bool uses_register_dynamic_addressing =
current_shader().uses_register_dynamic_addressing();
// Writing the index to X of GPR 0 - either directly if not using indexable
// registers, or via a system temporary register.
uint32_t reg;
if (uses_register_dynamic_addressing()) {
if (uses_register_dynamic_addressing) {
reg = PushSystemTemp();
} else {
reg = 0;
@ -392,7 +406,7 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
DxbcOpBreak();
DxbcOpEndSwitch();
if (!uses_register_dynamic_addressing()) {
if (!uses_register_dynamic_addressing) {
// Break register dependency.
DxbcOpMov(swap_temp_dest, DxbcSrc::LF(0.0f));
}
@ -409,7 +423,7 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
// Convert to float.
DxbcOpIToF(index_dest, index_src);
if (uses_register_dynamic_addressing()) {
if (uses_register_dynamic_addressing) {
// Store to indexed GPR 0 in x0[0].
DxbcOpMov(DxbcDest::X(0, 0, 0b0001), index_src);
PopSystemTemp();
@ -417,6 +431,9 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
}
void DxbcShaderTranslator::StartVertexOrDomainShader() {
bool uses_register_dynamic_addressing =
current_shader().uses_register_dynamic_addressing();
// Zero the interpolators.
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + i),
@ -438,13 +455,13 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// Copy the domain location to r0.xyz.
// ZYX swizzle according to Call of Duty 3 and Viva Pinata.
in_domain_location_used_ |= 0b0111;
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0111)
: DxbcDest::R(0, 0b0111),
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0111)
: DxbcDest::R(0, 0b0111),
DxbcSrc::VDomain(0b000110));
if (register_count() >= 2) {
// Copy the control point indices (already swapped and converted to
// float by the host vertex and hull shaders) to r1.xyz.
DxbcDest control_point_index_dest(uses_register_dynamic_addressing()
DxbcDest control_point_index_dest(uses_register_dynamic_addressing
? DxbcDest::X(0, 1)
: DxbcDest::R(1));
in_control_point_index_used_ = true;
@ -465,16 +482,16 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// ZYX swizzle with r1.y == 0, according to the water shader in
// Banjo-Kazooie: Nuts & Bolts.
in_domain_location_used_ |= 0b0111;
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0111)
: DxbcDest::R(0, 0b0111),
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0111)
: DxbcDest::R(0, 0b0111),
DxbcSrc::VDomain(0b000110));
if (register_count() >= 2) {
// Copy the primitive index to r1.x as a float.
uint32_t primitive_id_temp =
uses_register_dynamic_addressing() ? PushSystemTemp() : 1;
uses_register_dynamic_addressing ? PushSystemTemp() : 1;
in_primitive_id_used_ = true;
DxbcOpUToF(DxbcDest::R(primitive_id_temp, 0b0001), DxbcSrc::VPrim());
if (uses_register_dynamic_addressing()) {
if (uses_register_dynamic_addressing) {
DxbcOpMov(DxbcDest::X(0, 1, 0b0001),
DxbcSrc::R(primitive_id_temp, DxbcSrc::kXXXX));
// Release primitive_id_temp.
@ -499,9 +516,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
//
// Direct3D 12 passes the coordinates in a consistent order, so can
// just use the identity swizzle.
DxbcOpMov(uses_register_dynamic_addressing()
? DxbcDest::X(0, 1, 0b0010)
: DxbcDest::R(1, 0b0010),
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 1, 0b0010)
: DxbcDest::R(1, 0b0010),
DxbcSrc::LF(0.0f));
}
}
@ -512,8 +528,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
if (register_count() >= 1) {
// Copy the domain location to r0.xy.
in_domain_location_used_ |= 0b0011;
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0011)
: DxbcDest::R(0, 0b0011),
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0011)
: DxbcDest::R(0, 0b0011),
DxbcSrc::VDomain());
// Control point indices according to the shader from the main menu of
// Defender, which starts from `cndeq r2, c255.xxxy, r1.xyzz, r0.zzzz`,
@ -524,14 +540,13 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// r1.z for (1 - r0.x) * r0.y
in_control_point_index_used_ = true;
DxbcOpMov(
uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0100)
: DxbcDest::R(0, 0b0100),
uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0100)
: DxbcDest::R(0, 0b0100),
DxbcSrc::VICP(0, uint32_t(InOutRegister::kDSInControlPointIndex),
DxbcSrc::kXXXX));
if (register_count() >= 2) {
DxbcDest r1_dest(uses_register_dynamic_addressing()
? DxbcDest::X(0, 1)
: DxbcDest::R(1));
DxbcDest r1_dest(uses_register_dynamic_addressing ? DxbcDest::X(0, 1)
: DxbcDest::R(1));
for (uint32_t i = 0; i < 3; ++i) {
DxbcOpMov(
r1_dest.Mask(1 << i),
@ -549,15 +564,15 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// Copy the domain location to r0.yz.
// XY swizzle according to the ground shader in Viva Pinata.
in_domain_location_used_ |= 0b0011;
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0110)
: DxbcDest::R(0, 0b0110),
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0110)
: DxbcDest::R(0, 0b0110),
DxbcSrc::VDomain(0b010000));
// Copy the primitive index to r0.x as a float.
uint32_t primitive_id_temp =
uses_register_dynamic_addressing() ? PushSystemTemp() : 0;
uses_register_dynamic_addressing ? PushSystemTemp() : 0;
in_primitive_id_used_ = true;
DxbcOpUToF(DxbcDest::R(primitive_id_temp, 0b0001), DxbcSrc::VPrim());
if (uses_register_dynamic_addressing()) {
if (uses_register_dynamic_addressing) {
DxbcOpMov(DxbcDest::X(0, 0, 0b0001),
DxbcSrc::R(primitive_id_temp, DxbcSrc::kXXXX));
// Release primitive_id_temp.
@ -578,9 +593,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
//
// Direct3D 12 passes the coordinates in a consistent order, so can
// just use the identity swizzle.
DxbcOpMov(uses_register_dynamic_addressing()
? DxbcDest::X(0, 1, 0b0001)
: DxbcDest::R(1, 0b0001),
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 1, 0b0001)
: DxbcDest::R(1, 0b0001),
DxbcSrc::LF(0.0f));
}
}
@ -611,7 +625,10 @@ void DxbcShaderTranslator::StartPixelShader() {
return;
}
if (!edram_rov_used_ && writes_depth()) {
bool uses_register_dynamic_addressing =
current_shader().uses_register_dynamic_addressing();
if (!edram_rov_used_ && current_shader().writes_depth()) {
// Initialize the depth output if used, which must be written to regardless
// of the taken execution path.
DxbcOpMov(DxbcDest::ODepth(), DxbcSrc::LF(0.0f));
@ -623,7 +640,7 @@ void DxbcShaderTranslator::StartPixelShader() {
// Copy interpolants to GPRs.
if (edram_rov_used_) {
uint32_t centroid_temp =
uses_register_dynamic_addressing() ? PushSystemTemp() : UINT32_MAX;
uses_register_dynamic_addressing ? PushSystemTemp() : UINT32_MAX;
system_constants_used_ |= 1ull
<< kSysConst_InterpolatorSamplingPattern_Index;
DxbcSrc sampling_pattern_src(
@ -635,7 +652,7 @@ void DxbcShaderTranslator::StartPixelShader() {
// With GPR dynamic addressing, first evaluate to centroid_temp r#, then
// store to the x#.
uint32_t centroid_register =
uses_register_dynamic_addressing() ? centroid_temp : i;
uses_register_dynamic_addressing ? centroid_temp : i;
// Check if the input needs to be interpolated at center (if the bit is
// set).
DxbcOpAnd(DxbcDest::R(centroid_register, 0b0001), sampling_pattern_src,
@ -643,8 +660,8 @@ void DxbcShaderTranslator::StartPixelShader() {
DxbcOpIf(bool(xenos::SampleLocation::kCenter),
DxbcSrc::R(centroid_register, DxbcSrc::kXXXX));
// At center.
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
: DxbcDest::R(i),
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, i)
: DxbcDest::R(i),
DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i));
DxbcOpElse();
// At centroid. Not really important that 2x MSAA is emulated using
@ -653,7 +670,7 @@ void DxbcShaderTranslator::StartPixelShader() {
DxbcOpEvalCentroid(
DxbcDest::R(centroid_register),
DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i));
if (uses_register_dynamic_addressing()) {
if (uses_register_dynamic_addressing) {
DxbcOpMov(DxbcDest::X(0, i), DxbcSrc::R(centroid_register));
}
DxbcOpEndIf();
@ -665,8 +682,8 @@ void DxbcShaderTranslator::StartPixelShader() {
// SSAA instead of MSAA without ROV - everything is interpolated at
// samples, can't extrapolate.
for (uint32_t i = 0; i < interpolator_count; ++i) {
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
: DxbcDest::R(i),
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, i)
: DxbcDest::R(i),
DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i));
}
}
@ -781,7 +798,7 @@ void DxbcShaderTranslator::StartPixelShader() {
}
// Write ps_param_gen to the specified GPR.
DxbcSrc param_gen_src(DxbcSrc::R(param_gen_temp));
if (uses_register_dynamic_addressing()) {
if (uses_register_dynamic_addressing) {
// Copy the GPR number to r# for relative addressing.
uint32_t param_gen_copy_temp = PushSystemTemp();
DxbcOpMov(DxbcDest::R(param_gen_copy_temp, 0b0001),
@ -863,10 +880,12 @@ void DxbcShaderTranslator::StartTranslation() {
// by the guest code, so initialize because assumptions can't be made
// about the integrity of the guest code.
system_temp_depth_stencil_ =
PushSystemTemp(writes_depth() ? 0b0001 : 0b1111);
PushSystemTemp(current_shader().writes_depth() ? 0b0001 : 0b1111);
}
uint32_t shader_writes_color_targets =
current_shader().writes_color_targets();
for (uint32_t i = 0; i < 4; ++i) {
if (writes_color_target(i)) {
if (shader_writes_color_targets & (1 << i)) {
system_temps_color_[i] = PushSystemTemp(0b1111);
}
}
@ -879,8 +898,8 @@ void DxbcShaderTranslator::StartTranslation() {
std::memset(system_temps_memexport_data_, 0xFF,
sizeof(system_temps_memexport_data_));
system_temp_memexport_written_ = UINT32_MAX;
const uint8_t* memexports_written = memexport_eM_written();
for (uint32_t i = 0; i < kMaxMemExports; ++i) {
const uint8_t* memexports_written = current_shader().memexport_eM_written();
for (uint32_t i = 0; i < Shader::kMaxMemExports; ++i) {
uint32_t memexport_alloc_written = memexports_written[i];
if (memexport_alloc_written == 0) {
continue;
@ -915,8 +934,9 @@ void DxbcShaderTranslator::StartTranslation() {
// references them after only initializing them conditionally.
for (uint32_t i = is_pixel_shader() ? xenos::kMaxInterpolators : 0;
i < register_count(); ++i) {
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
: DxbcDest::R(i),
DxbcOpMov(current_shader().uses_register_dynamic_addressing()
? DxbcDest::X(0, i)
: DxbcDest::R(i),
DxbcSrc::LF(0.0f));
}
}
@ -1120,7 +1140,7 @@ void DxbcShaderTranslator::CompleteShaderCode() {
ExportToMemory();
// Release memexport temporary registers.
for (int i = kMaxMemExports - 1; i >= 0; --i) {
for (int i = Shader::kMaxMemExports - 1; i >= 0; --i) {
if (system_temps_memexport_address_[i] == UINT32_MAX) {
continue;
}
@ -1154,8 +1174,10 @@ void DxbcShaderTranslator::CompleteShaderCode() {
PopSystemTemp(2);
} else if (is_pixel_shader()) {
// Release system_temps_color_.
uint32_t shader_writes_color_targets =
current_shader().writes_color_targets();
for (int32_t i = 3; i >= 0; --i) {
if (writes_color_target(i)) {
if (shader_writes_color_targets & (1 << i)) {
PopSystemTemp();
}
}
@ -1274,40 +1296,42 @@ std::vector<uint8_t> DxbcShaderTranslator::CompleteTranslation() {
return shader_object_bytes;
}
void DxbcShaderTranslator::PostTranslation(
Shader::Translation& translation, bool setup_shader_post_translation_info) {
if (setup_shader_post_translation_info) {
DxbcShader* dxbc_shader = dynamic_cast<DxbcShader*>(&translation.shader());
if (dxbc_shader) {
dxbc_shader->texture_bindings_.clear();
dxbc_shader->texture_bindings_.reserve(texture_bindings_.size());
dxbc_shader->used_texture_mask_ = 0;
for (const TextureBinding& translator_binding : texture_bindings_) {
DxbcShader::TextureBinding& shader_binding =
dxbc_shader->texture_bindings_.emplace_back();
// For a stable hash.
std::memset(&shader_binding, 0, sizeof(shader_binding));
shader_binding.bindless_descriptor_index =
translator_binding.bindless_descriptor_index;
shader_binding.fetch_constant = translator_binding.fetch_constant;
shader_binding.dimension = translator_binding.dimension;
shader_binding.is_signed = translator_binding.is_signed;
dxbc_shader->used_texture_mask_ |= 1u
<< translator_binding.fetch_constant;
}
dxbc_shader->sampler_bindings_.clear();
dxbc_shader->sampler_bindings_.reserve(sampler_bindings_.size());
for (const SamplerBinding& translator_binding : sampler_bindings_) {
DxbcShader::SamplerBinding& shader_binding =
dxbc_shader->sampler_bindings_.emplace_back();
shader_binding.bindless_descriptor_index =
translator_binding.bindless_descriptor_index;
shader_binding.fetch_constant = translator_binding.fetch_constant;
shader_binding.mag_filter = translator_binding.mag_filter;
shader_binding.min_filter = translator_binding.min_filter;
shader_binding.mip_filter = translator_binding.mip_filter;
shader_binding.aniso_filter = translator_binding.aniso_filter;
}
void DxbcShaderTranslator::PostTranslation() {
Shader::Translation& translation = current_translation();
if (!translation.is_valid()) {
return;
}
DxbcShader* dxbc_shader = dynamic_cast<DxbcShader*>(&translation.shader());
if (dxbc_shader && !dxbc_shader->bindings_setup_entered_.test_and_set(
std::memory_order_relaxed)) {
dxbc_shader->texture_bindings_.clear();
dxbc_shader->texture_bindings_.reserve(texture_bindings_.size());
dxbc_shader->used_texture_mask_ = 0;
for (const TextureBinding& translator_binding : texture_bindings_) {
DxbcShader::TextureBinding& shader_binding =
dxbc_shader->texture_bindings_.emplace_back();
// For a stable hash.
std::memset(&shader_binding, 0, sizeof(shader_binding));
shader_binding.bindless_descriptor_index =
translator_binding.bindless_descriptor_index;
shader_binding.fetch_constant = translator_binding.fetch_constant;
shader_binding.dimension = translator_binding.dimension;
shader_binding.is_signed = translator_binding.is_signed;
dxbc_shader->used_texture_mask_ |= 1u
<< translator_binding.fetch_constant;
}
dxbc_shader->sampler_bindings_.clear();
dxbc_shader->sampler_bindings_.reserve(sampler_bindings_.size());
for (const SamplerBinding& translator_binding : sampler_bindings_) {
DxbcShader::SamplerBinding& shader_binding =
dxbc_shader->sampler_bindings_.emplace_back();
shader_binding.bindless_descriptor_index =
translator_binding.bindless_descriptor_index;
shader_binding.fetch_constant = translator_binding.fetch_constant;
shader_binding.mag_filter = translator_binding.mag_filter;
shader_binding.min_filter = translator_binding.min_filter;
shader_binding.mip_filter = translator_binding.mip_filter;
shader_binding.aniso_filter = translator_binding.aniso_filter;
}
}
}
@ -1373,7 +1397,7 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
DxbcSrc src(DxbcSrc::LF(0.0f));
switch (operand.storage_source) {
case InstructionStorageSource::kRegister: {
if (uses_register_dynamic_addressing()) {
if (current_shader().uses_register_dynamic_addressing()) {
// Load x#[#] to r# because x#[#] can be used only with mov.
uint32_t temp = PushSystemTemp();
temp_pushed_out = true;
@ -1402,10 +1426,12 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
if (cbuffer_index_float_constants_ == kBindingIndexUnallocated) {
cbuffer_index_float_constants_ = cbuffer_count_++;
}
const Shader::ConstantRegisterMap& constant_register_map =
current_shader().constant_register_map();
if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) {
uint32_t float_constant_index =
constant_register_map().GetPackedFloatConstantIndex(
constant_register_map.GetPackedFloatConstantIndex(
operand.storage_index);
assert_true(float_constant_index != UINT32_MAX);
if (float_constant_index == UINT32_MAX) {
@ -1413,7 +1439,7 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
}
index.index_ = float_constant_index;
} else {
assert_true(constant_register_map().float_dynamic_addressing);
assert_true(constant_register_map.float_dynamic_addressing);
}
src = DxbcSrc::CB(cbuffer_index_float_constants_,
uint32_t(CbufferRegister::kFloatConstants), index);
@ -1453,7 +1479,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
case InstructionStorageTarget::kNone:
return;
case InstructionStorageTarget::kRegister:
if (uses_register_dynamic_addressing()) {
if (current_shader().uses_register_dynamic_addressing()) {
DxbcIndex register_index(result.storage_index);
switch (result.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic:
@ -1488,7 +1514,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
case InstructionStorageTarget::kExportAddress:
// Validate memexport writes (Halo 3 has some weird invalid ones).
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
memexport_alloc_current_count_ > kMaxMemExports ||
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
UINT32_MAX) {
return;
@ -1499,7 +1525,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
case InstructionStorageTarget::kExportData: {
// Validate memexport writes (Halo 3 has some weird invalid ones).
if (memexport_alloc_current_count_ == 0 ||
memexport_alloc_current_count_ > kMaxMemExports ||
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
[result.storage_index] == UINT32_MAX) {
return;
@ -1519,7 +1545,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
} break;
case InstructionStorageTarget::kColor:
assert_not_zero(used_write_mask);
assert_true(writes_color_target(result.storage_index));
assert_true(current_shader().writes_color_target(result.storage_index));
dest = DxbcDest::R(system_temps_color_[result.storage_index]);
if (edram_rov_used_) {
// For ROV output, mark that the color has been written to.
@ -1539,7 +1565,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
// Writes X to scalar oDepth or to X of system_temp_depth_stencil_, no
// additional swizzling needed.
assert_true(used_write_mask == 0b0001);
assert_true(writes_depth());
assert_true(current_shader().writes_depth());
if (IsDepthStencilSystemTempUsed()) {
dest = DxbcDest::R(system_temp_depth_stencil_);
} else {
@ -2077,6 +2103,9 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
uint32_t chunk_position_dwords = uint32_t(shader_object_.size());
uint32_t new_offset;
const Shader::ConstantRegisterMap& constant_register_map =
current_shader().constant_register_map();
// ***************************************************************************
// Header
// ***************************************************************************
@ -2162,7 +2191,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Declaring a 0-sized array may not be safe, so write something valid
// even if they aren't used.
shader_object_.push_back(
std::max(constant_register_map().float_count, uint32_t(1)));
std::max(constant_register_map.float_count, uint32_t(1)));
break;
case RdefTypeIndex::kUint4DescriptorIndexArray:
shader_object_.push_back(std::max(
@ -2278,10 +2307,10 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Float constants.
uint32_t constant_offset_float = new_offset;
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
assert_not_zero(constant_register_map().float_count);
assert_not_zero(constant_register_map.float_count);
shader_object_.push_back(constant_name_offset_float);
shader_object_.push_back(0);
shader_object_.push_back(constant_register_map().float_count * 4 *
shader_object_.push_back(constant_register_map.float_count * 4 *
sizeof(float));
shader_object_.push_back(kDxbcRdefVariableFlagUsed);
shader_object_.push_back(types_offset +
@ -2405,11 +2434,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// No D3D_SHADER_CBUFFER_FLAGS.
shader_object_.push_back(0);
} else if (i == cbuffer_index_float_constants_) {
assert_not_zero(constant_register_map().float_count);
assert_not_zero(constant_register_map.float_count);
shader_object_.push_back(cbuffer_name_offset_float);
shader_object_.push_back(1);
shader_object_.push_back(constant_offset_float);
shader_object_.push_back(constant_register_map().float_count * 4 *
shader_object_.push_back(constant_register_map.float_count * 4 *
sizeof(float));
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
shader_object_.push_back(0);
@ -3211,7 +3240,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
if (!edram_rov_used_) {
// Color render targets (SV_Target#).
size_t target_position = SIZE_MAX;
if (writes_any_color_target()) {
if (current_shader().writes_color_targets()) {
target_position = shader_object_.size();
shader_object_.resize(shader_object_.size() + 4 * kParameterDwords);
parameter_count += 4;
@ -3233,7 +3262,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
Modification::DepthStencilMode depth_stencil_mode =
GetDxbcShaderModification().depth_stencil_mode;
size_t depth_position = SIZE_MAX;
if (writes_depth() || DSV_IsWritingFloat24Depth()) {
if (current_shader().writes_depth() || DSV_IsWritingFloat24Depth()) {
depth_position = shader_object_.size();
shader_object_.resize(shader_object_.size() + kParameterDwords);
++parameter_count;
@ -3268,7 +3297,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
depth.semantic_name = semantic_offset;
}
const char* depth_semantic_name;
if (!writes_depth() &&
if (!current_shader().writes_depth() &&
GetDxbcShaderModification().depth_stencil_mode ==
Modification::DepthStencilMode::kFloat24Truncating) {
depth_semantic_name = "SV_DepthLessEqual";
@ -3361,7 +3390,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
if (is_pixel_shader() &&
GetDxbcShaderModification().depth_stencil_mode ==
Modification::DepthStencilMode::kEarlyHint &&
!edram_rov_used_ && CanWriteZEarly()) {
!edram_rov_used_ && current_shader().implicit_early_z_write_allowed()) {
global_flags_opcode |= D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL;
}
shader_object_.push_back(global_flags_opcode);
@ -3369,11 +3398,13 @@ void DxbcShaderTranslator::WriteShaderCode() {
// Constant buffers, from most frequenly accessed to least frequently accessed
// (the order is a hint to the driver according to the DXBC header).
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
assert_not_zero(constant_register_map().float_count);
const Shader::ConstantRegisterMap& constant_register_map =
current_shader().constant_register_map();
assert_not_zero(constant_register_map.float_count);
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
constant_register_map().float_dynamic_addressing
constant_register_map.float_dynamic_addressing
? D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
@ -3382,7 +3413,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(cbuffer_index_float_constants_);
shader_object_.push_back(uint32_t(CbufferRegister::kFloatConstants));
shader_object_.push_back(uint32_t(CbufferRegister::kFloatConstants));
shader_object_.push_back(constant_register_map().float_count);
shader_object_.push_back(constant_register_map.float_count);
shader_object_.push_back(0);
}
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
@ -3715,6 +3746,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
++stat_.dcl_count;
} else if (is_pixel_shader()) {
bool is_writing_float24_depth = DSV_IsWritingFloat24Depth();
bool shader_writes_depth = current_shader().writes_depth();
// Interpolator input.
if (!is_depth_only_pixel_shader_) {
uint32_t interpolator_count =
@ -3766,7 +3798,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
// applicable here) position is mandatory. However, with depth output, on
// the guest, there's only one depth value for the whole pixel.
D3D10_SB_INTERPOLATION_MODE position_interpolation_mode =
is_writing_float24_depth && !writes_depth()
is_writing_float24_depth && !shader_writes_depth
? D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE
: D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE;
shader_object_.push_back(
@ -3806,7 +3838,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
EncodeScalarOperand(D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK, 0));
++stat_.dcl_count;
} else {
if (writes_any_color_target()) {
if (current_shader().writes_color_targets()) {
// Color output.
for (uint32_t i = 0; i < 4; ++i) {
shader_object_.push_back(
@ -3819,9 +3851,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
}
// Depth output.
if (is_writing_float24_depth || writes_depth()) {
if (is_writing_float24_depth || shader_writes_depth) {
D3D10_SB_OPERAND_TYPE depth_operand_type;
if (!writes_depth() &&
if (!shader_writes_depth &&
GetDxbcShaderModification().depth_stencil_mode ==
Modification::DepthStencilMode::kFloat24Truncating) {
depth_operand_type = D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL;
@ -3840,7 +3872,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
// Temporary registers - guest general-purpose registers if not using dynamic
// indexing and Xenia internal registers.
stat_.temp_register_count = system_temp_count_max_;
if (!is_depth_only_pixel_shader_ && !uses_register_dynamic_addressing()) {
if (!is_depth_only_pixel_shader_ &&
!current_shader().uses_register_dynamic_addressing()) {
stat_.temp_register_count += register_count();
}
if (stat_.temp_register_count != 0) {
@ -3851,7 +3884,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
// General-purpose registers if using dynamic indexing (x0).
if (!is_depth_only_pixel_shader_ && uses_register_dynamic_addressing()) {
if (!is_depth_only_pixel_shader_ &&
current_shader().uses_register_dynamic_addressing()) {
assert_true(register_count() != 0);
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) |

View File

@ -106,13 +106,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
// If anything in this is structure is changed in a way not compatible with
// the previous layout, invalidate the pipeline storages by increasing this
// version number (0xYYYYMMDD)!
static constexpr uint32_t kVersion = 0x20201203;
static constexpr uint32_t kVersion = 0x20201219;
enum class DepthStencilMode : uint32_t {
kNoModifiers,
// [earlydepthstencil] - enable if alpha test and alpha to coverage are
// disabled; ignored if anything in the shader blocks early Z writing
// (which is not known before translation, so this will be set anyway).
// disabled; ignored if anything in the shader blocks early Z writing.
kEarlyHint,
// Converting the depth to the closest 32-bit float representable exactly
// as a 20e4 float, to support invariance in cases when the guest
@ -136,15 +135,17 @@ class DxbcShaderTranslator : public ShaderTranslator {
};
struct {
// Both - dynamically indexable register count from SQ_PROGRAM_CNTL.
uint32_t dynamic_addressable_register_count : 8;
// VS - pipeline stage and input configuration.
Shader::HostVertexShaderType host_vertex_shader_type
: Shader::kHostVertexShaderTypeBitCount;
// PS, non-ROV - depth / stencil output mode.
DepthStencilMode depth_stencil_mode : 2;
};
uint32_t value = 0;
uint64_t value = 0;
Modification(uint32_t modification_value = 0) : value(modification_value) {}
Modification(uint64_t modification_value = 0) : value(modification_value) {}
};
// Constant buffer bindings in space 0.
@ -467,8 +468,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
float& clamp_alpha_high, uint32_t& keep_mask_low,
uint32_t& keep_mask_high);
uint32_t GetDefaultModification(
uint64_t GetDefaultModification(
xenos::ShaderType shader_type,
uint32_t dynamic_addressable_register_count,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex) const override;
@ -477,12 +479,13 @@ class DxbcShaderTranslator : public ShaderTranslator {
std::vector<uint8_t> CreateDepthOnlyPixelShader();
protected:
void Reset(xenos::ShaderType shader_type) override;
void Reset() override;
uint32_t GetModificationRegisterCount() const override;
void StartTranslation() override;
std::vector<uint8_t> CompleteTranslation() override;
void PostTranslation(Shader::Translation& translation,
bool setup_shader_post_translation_info) override;
void PostTranslation() override;
void ProcessLabel(uint32_t cf_index) override;
@ -2184,7 +2187,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
}
Modification GetDxbcShaderModification() const {
return Modification(modification());
return Modification(current_translation().modification());
}
bool IsDxbcVertexShader() const {
@ -2227,9 +2230,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
bool IsDepthStencilSystemTempUsed() const {
// See system_temp_depth_stencil_ documentation for explanation of cases.
if (edram_rov_used_) {
return writes_depth() || ROV_IsDepthStencilEarly();
return current_shader().writes_depth() || ROV_IsDepthStencilEarly();
}
return writes_depth() && DSV_IsWritingFloat24Depth();
return current_shader().writes_depth() && DSV_IsWritingFloat24Depth();
}
// Whether the current non-ROV pixel shader should convert the depth to 20e4.
bool DSV_IsWritingFloat24Depth() const {
@ -2246,8 +2249,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Whether it's possible and worth skipping running the translated shader for
// 2x2 quads.
bool ROV_IsDepthStencilEarly() const {
return !is_depth_only_pixel_shader_ && !writes_depth() &&
memexport_stream_constants().empty();
return !is_depth_only_pixel_shader_ && !current_shader().writes_depth() &&
current_shader().memexport_stream_constants().empty();
}
// Converts the depth value to 24-bit (storing the result in bits 0:23 and
// zeros in 24:31, not creating room for stencil - since this may be involved
@ -2467,7 +2470,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Is currently writing the empty depth-only pixel shader, for
// CompleteTranslation.
bool is_depth_only_pixel_shader_;
bool is_depth_only_pixel_shader_ = false;
// Data types used in constants buffers. Listed in dependency order.
enum class RdefTypeIndex {
@ -2604,9 +2607,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
// 4 `alloc export`s per component.
uint32_t system_temp_memexport_written_;
// eA in each `alloc export`, or UINT32_MAX if not used.
uint32_t system_temps_memexport_address_[kMaxMemExports];
uint32_t system_temps_memexport_address_[Shader::kMaxMemExports];
// eM# in each `alloc export`, or UINT32_MAX if not used.
uint32_t system_temps_memexport_data_[kMaxMemExports][5];
uint32_t system_temps_memexport_data_[Shader::kMaxMemExports][5];
// Vector ALU or fetch result/scratch (since Xenos write masks can contain
// swizzles).

View File

@ -136,7 +136,7 @@ void DxbcShaderTranslator::ExportToMemory() {
DxbcOpIf(true, DxbcSrc::R(control_temp, DxbcSrc::kXXXX));
// control_temp.x is now free.
for (uint32_t i = 0; i < kMaxMemExports; ++i) {
for (uint32_t i = 0; i < Shader::kMaxMemExports; ++i) {
uint32_t eA_temp = system_temps_memexport_address_[i];
if (eA_temp == UINT32_MAX) {
// Export not used.

View File

@ -144,7 +144,7 @@ void DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
}
void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
bool color_targets_written = writes_any_color_target();
bool any_color_targets_written = current_shader().writes_color_targets() != 0;
// ***************************************************************************
// Get EDRAM offsets for the pixel:
@ -272,7 +272,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0001),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX));
if (color_targets_written) {
if (any_color_targets_written) {
// Write 32bpp color offset to system_temp_rov_params_.z.
// system_temp_rov_params_.x = X sample 0 position within the depth tile
// system_temp_rov_params_.y = row offset
@ -303,8 +303,8 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
// Release resolution_scale_log2_temp.
PopSystemTemp();
{
DxbcDest offsets_dest(DxbcDest::R(system_temp_rov_params_,
color_targets_written ? 0b0110 : 0b0010));
DxbcDest offsets_dest(DxbcDest::R(
system_temp_rov_params_, any_color_targets_written ? 0b0110 : 0b0010));
// Scale the offsets by the resolution scale.
// system_temp_rov_params_.y = scaled 32bpp depth/stencil first host pixel
// address
@ -329,7 +329,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
// Close the resolution scale conditional.
DxbcOpEndIf();
if (color_targets_written) {
if (any_color_targets_written) {
// Get the 64bpp color offset to system_temp_rov_params_.w.
// TODO(Triang3l): Find some game that aliases 64bpp with 32bpp to emulate
// the real layout.
@ -388,8 +388,6 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
}
void DxbcShaderTranslator::ROV_DepthStencilTest() {
bool depth_stencil_early = ROV_IsDepthStencilEarly();
uint32_t temp = PushSystemTemp();
DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001));
DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX));
@ -413,6 +411,9 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
// temp.x = free
DxbcOpIf(true, temp_x_src);
bool depth_stencil_early = ROV_IsDepthStencilEarly();
bool shader_writes_depth = current_shader().writes_depth();
for (uint32_t i = 0; i < 4; ++i) {
// With early depth/stencil, depth/stencil writing may be deferred to the
// end of the shader to prevent writing in case something (like alpha test,
@ -427,7 +428,7 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
: temp_x_src);
if (!i) {
if (writes_depth()) {
if (shader_writes_depth) {
// Clamp oDepth to the lower viewport depth bound (depth clamp happens
// after the pixel shader in the pipeline, at least on Direct3D 11 and
// Vulkan, thus applies to the shader's depth output too).
@ -569,7 +570,7 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
// temp.w = free
DxbcOpIf(true, temp_w_src);
if (writes_depth()) {
if (shader_writes_depth) {
// Copy the 24-bit depth common to all samples to sample_depth_stencil.
// temp.x = shader-generated 24-bit depth
DxbcOpMov(sample_depth_stencil_dest,
@ -1024,7 +1025,8 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
// temp.z = viewport maximum depth if not writing to oDepth
// temp.w = whether depth/stencil has been modified
DxbcOpINE(temp_w_dest, sample_depth_stencil_src, temp_w_src);
if (depth_stencil_early && !CanWriteZEarly()) {
if (depth_stencil_early &&
!current_shader().implicit_early_z_write_allowed()) {
// Set the sample bit in bits 4:7 of system_temp_rov_params_.x - always
// need to write late in this shader, as it may do something like
// explicitly killing pixels.
@ -1734,7 +1736,7 @@ void DxbcShaderTranslator::ROV_HandleAlphaBlendFactorCases(
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() {
// Check if alpha to coverage can be done at all in this shader.
if (!writes_color_target(0)) {
if (!current_shader().writes_color_target(0)) {
return;
}
@ -1863,21 +1865,22 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() {
}
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
if (!writes_any_color_target()) {
uint32_t shader_writes_color_targets =
current_shader().writes_color_targets();
if (!shader_writes_color_targets) {
return;
}
// Check if this sample needs to be discarded by alpha to coverage.
CompletePixelShader_WriteToRTVs_AlphaToMask();
// Get the write mask as components, and also apply the exponent bias after
// alpha to coverage because it needs the unbiased alpha from the shader.
uint32_t guest_rt_mask = 0;
uint32_t gamma_temp = PushSystemTemp();
for (uint32_t i = 0; i < 4; ++i) {
if (!writes_color_target(i)) {
if (!(shader_writes_color_targets & (1 << i))) {
continue;
}
guest_rt_mask |= 1 << i;
// Apply the exponent bias after alpha to coverage because it needs the
// unbiased alpha from the shader
system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index;
DxbcOpMul(DxbcDest::R(system_temps_color_[i]),
DxbcSrc::R(system_temps_color_[i]),
@ -1885,16 +1888,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_ColorExpBias_Vec)
.Select(i));
}
// Convert to gamma space - this is incorrect, since it must be done after
// blending on the Xbox 360, but this is just one of many blending issues in
// the RTV path.
uint32_t gamma_temp = PushSystemTemp();
for (uint32_t i = 0; i < 4; ++i) {
if (!(guest_rt_mask & (1 << i))) {
continue;
}
// Convert to gamma space - this is incorrect, since it must be done after
// blending on the Xbox 360, but this is just one of many blending issues in
// the RTV path.
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
DxbcOpAnd(DxbcDest::R(gamma_temp, 0b0001),
DxbcSrc::CB(cbuffer_index_system_constants_,
@ -1923,7 +1919,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
// Host RT i, guest RT j.
for (uint32_t i = 0; i < 4; ++i) {
// mask = map.iiii == (0, 1, 2, 3)
DxbcOpIEq(DxbcDest::R(remap_movc_mask_temp, guest_rt_mask),
DxbcOpIEq(DxbcDest::R(remap_movc_mask_temp, shader_writes_color_targets),
DxbcSrc::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_ColorOutputMap_Vec)
@ -1932,7 +1928,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
bool guest_rt_first = true;
for (uint32_t j = 0; j < 4; ++j) {
// If map.i == j, move guest color j to the temporary host color.
if (!(guest_rt_mask & (1 << j))) {
if (!(shader_writes_color_targets & (1 << j))) {
continue;
}
DxbcOpMovC(DxbcDest::R(remap_movc_target_temp),
@ -1954,8 +1950,10 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
return;
}
bool shader_writes_depth = current_shader().writes_depth();
uint32_t temp;
if (writes_depth()) {
if (shader_writes_depth) {
// The depth is already written to system_temp_depth_stencil_.x and clamped
// to 0...1 with NaNs dropped (saturating in StoreResult); yzw are free.
temp = system_temp_depth_stencil_;
@ -1991,8 +1989,8 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
// The smallest denormalized 20e4 number is -34 - should drop 23 mantissa
// bits at -34.
// Anything smaller than 2^-34 becomes 0.
DxbcDest truncate_dest(writes_depth() ? DxbcDest::ODepth()
: DxbcDest::ODepthLE());
DxbcDest truncate_dest(shader_writes_depth ? DxbcDest::ODepth()
: DxbcDest::ODepthLE());
// Check if the number is representable as a float24 after truncation - the
// exponent is at least -34.
DxbcOpUGE(temp_y_dest, temp_x_src, DxbcSrc::LU(0x2E800000));
@ -2076,7 +2074,7 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
temp_y_src);
}
if (!writes_depth()) {
if (!shader_writes_depth) {
// Release temp.
PopSystemTemp();
}
@ -2106,7 +2104,7 @@ void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMaskSample(
void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() {
// Check if alpha to coverage can be done at all in this shader.
if (!writes_color_target(0)) {
if (!current_shader().writes_color_target(0)) {
return;
}
@ -2269,8 +2267,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
}
// Write color values.
uint32_t shader_writes_color_targets =
current_shader().writes_color_targets();
for (uint32_t i = 0; i < 4; ++i) {
if (!writes_color_target(i)) {
if (!(shader_writes_color_targets & (1 << i))) {
continue;
}
@ -3156,7 +3156,7 @@ void DxbcShaderTranslator::CompletePixelShader() {
return;
}
if (writes_color_target(0)) {
if (current_shader().writes_color_target(0)) {
// Alpha test.
// X - mask, then masked result (SGPR for loading, VGPR for masking).
// Y - operation result (SGPR for mask operations, VGPR for alpha

View File

@ -97,6 +97,7 @@ union SQ_PROGRAM_CNTL {
// Note from a2xx.xml:
// Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG,
// but high bit is set to indicate "0 registers used".
// (Register count = (num_reg & 0x80) ? 0 : (num_reg + 1))
uint32_t vs_num_reg : 8; // +0
uint32_t ps_num_reg : 8; // +8
uint32_t vs_resource : 1; // +16

View File

@ -55,7 +55,7 @@ std::filesystem::path Shader::Translation::Dump(
}
path = path /
fmt::format(
"shader_{:016X}_{:08X}.{}.{}", shader().ucode_data_hash(),
"shader_{:016X}_{:016X}.{}.{}", shader().ucode_data_hash(),
modification(), path_prefix,
shader().type() == xenos::ShaderType::kVertex ? "vert" : "frag");
FILE* f = filesystem::OpenFile(path, "wb");
@ -78,7 +78,7 @@ std::filesystem::path Shader::Translation::Dump(
return std::move(path);
}
Shader::Translation* Shader::GetOrCreateTranslation(uint32_t modification,
Shader::Translation* Shader::GetOrCreateTranslation(uint64_t modification,
bool* is_new) {
auto it = translations_.find(modification);
if (it != translations_.end()) {
@ -95,7 +95,7 @@ Shader::Translation* Shader::GetOrCreateTranslation(uint32_t modification,
return translation;
}
void Shader::DestroyTranslation(uint32_t modification) {
void Shader::DestroyTranslation(uint64_t modification) {
auto it = translations_.find(modification);
if (it == translations_.end()) {
return;
@ -124,7 +124,7 @@ std::filesystem::path Shader::DumpUcodeBinary(
return std::move(path);
}
Shader::Translation* Shader::CreateTranslationInstance(uint32_t modification) {
Shader::Translation* Shader::CreateTranslationInstance(uint64_t modification) {
// Default implementation for simple cases like ucode disassembly.
return new Translation(*this, modification);
}

View File

@ -11,9 +11,9 @@
#define XENIA_GPU_SHADER_H_
#include <algorithm>
#include <atomic>
#include <cstdint>
#include <filesystem>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
@ -593,6 +593,41 @@ struct ParsedAluInstruction {
void Disassemble(StringBuffer* out) const;
};
void ParseControlFlowExec(const ucode::ControlFlowExecInstruction& cf,
uint32_t cf_index, ParsedExecInstruction& instr);
void ParseControlFlowCondExec(const ucode::ControlFlowCondExecInstruction& cf,
uint32_t cf_index, ParsedExecInstruction& instr);
void ParseControlFlowCondExecPred(
const ucode::ControlFlowCondExecPredInstruction& cf, uint32_t cf_index,
ParsedExecInstruction& instr);
void ParseControlFlowLoopStart(const ucode::ControlFlowLoopStartInstruction& cf,
uint32_t cf_index,
ParsedLoopStartInstruction& instr);
void ParseControlFlowLoopEnd(const ucode::ControlFlowLoopEndInstruction& cf,
uint32_t cf_index,
ParsedLoopEndInstruction& instr);
void ParseControlFlowCondCall(const ucode::ControlFlowCondCallInstruction& cf,
uint32_t cf_index, ParsedCallInstruction& instr);
void ParseControlFlowReturn(const ucode::ControlFlowReturnInstruction& cf,
uint32_t cf_index, ParsedReturnInstruction& instr);
void ParseControlFlowCondJmp(const ucode::ControlFlowCondJmpInstruction& cf,
uint32_t cf_index, ParsedJumpInstruction& instr);
void ParseControlFlowAlloc(const ucode::ControlFlowAllocInstruction& cf,
uint32_t cf_index, bool is_vertex_shader,
ParsedAllocInstruction& instr);
// Returns whether the fetch is a full one, and the next parsed mini vertex
// fetch should inherit most of its parameters.
bool ParseVertexFetchInstruction(
const ucode::VertexFetchInstruction& op,
const ucode::VertexFetchInstruction& previous_full_op,
ParsedVertexFetchInstruction& instr);
void ParseTextureFetchInstruction(const ucode::TextureFetchInstruction& op,
ParsedTextureFetchInstruction& instr);
void ParseAluInstruction(const ucode::AluInstruction& op,
xenos::ShaderType shader_type,
ParsedAluInstruction& instr);
class Shader {
public:
// Type of the vertex shader in a D3D11-like rendering pipeline - shader
@ -619,12 +654,8 @@ class Shader {
struct VertexBinding {
struct Attribute {
// Attribute index, 0-based in the entire shader.
int attrib_index;
// Fetch instruction with all parameters.
ParsedVertexFetchInstruction fetch_instr;
// Size of the attribute, in words.
uint32_t size_words;
};
// Index within the vertex binding listing.
@ -691,6 +722,10 @@ class Shader {
}
};
// Based on the number of AS_VS/PS_EXPORT_STREAM_* enum sets found in a game
// .pdb.
static constexpr uint32_t kMaxMemExports = 16;
class Translation {
public:
virtual ~Translation() {}
@ -698,7 +733,7 @@ class Shader {
Shader& shader() const { return shader_; }
// Translator-specific modification bits.
uint32_t modification() const { return modification_; }
uint64_t modification() const { return modification_; }
// True if the shader was translated and prepared without error.
bool is_valid() const { return is_valid_; }
@ -735,7 +770,7 @@ class Shader {
const char* path_prefix);
protected:
Translation(Shader& shader, uint32_t modification)
Translation(Shader& shader, uint64_t modification)
: shader_(shader), modification_(modification) {}
private:
@ -743,7 +778,7 @@ class Shader {
friend class ShaderTranslator;
Shader& shader_;
uint32_t modification_;
uint64_t modification_;
bool is_valid_ = false;
bool is_translated_ = false;
@ -765,32 +800,23 @@ class Shader {
const uint32_t* ucode_dwords() const { return ucode_data_.data(); }
size_t ucode_dword_count() const { return ucode_data_.size(); }
// Host translations with the specified modification bits. Not thread-safe
// with respect to translation creation/destruction.
const std::unordered_map<uint32_t, Translation*>& translations() const {
return translations_;
}
Translation* GetTranslation(uint32_t modification) const {
auto it = translations_.find(modification);
if (it != translations_.cend()) {
return it->second;
}
return nullptr;
}
Translation* GetOrCreateTranslation(uint32_t modification,
bool* is_new = nullptr);
// For shader storage loading, to remove a modification in case of translation
// failure. Not thread-safe.
void DestroyTranslation(uint32_t modification);
bool is_ucode_analyzed() const { return is_ucode_analyzed_; }
// ucode_disasm_buffer is temporary storage for disassembly (provided
// externally so it won't need to be reallocated for every shader).
void AnalyzeUcode(StringBuffer& ucode_disasm_buffer);
// The following parameters, until the translation, are valid if ucode
// information has been gathered.
// Microcode disassembly in D3D format.
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
// All vertex bindings used in the shader.
// Valid for vertex shaders only.
const std::vector<VertexBinding>& vertex_bindings() const {
return vertex_bindings_;
}
// All texture bindings used in the shader.
// Valid for both vertex and pixel shaders.
const std::vector<TextureBinding>& texture_bindings() const {
return texture_bindings_;
}
@ -800,24 +826,99 @@ class Shader {
return constant_register_map_;
}
// uint5[Shader::kMaxMemExports] - bits indicating which eM# registers have
// been written to after each `alloc export`, for up to Shader::kMaxMemExports
// exports. This will contain zero for certain corrupt exports - for those to
// which a valid eA was not written via a MAD with a stream constant.
const uint8_t* memexport_eM_written() const { return memexport_eM_written_; }
// All c# registers used as the addend in MAD operations to eA.
const std::vector<uint32_t>& memexport_stream_constants() const {
const std::set<uint32_t>& memexport_stream_constants() const {
return memexport_stream_constants_;
}
// Returns true if the given color target index [0-3].
bool writes_color_target(uint32_t i) const {
return writes_color_targets_[i];
// Labels that jumps (explicit or from loops) can be done to.
const std::set<uint32_t>& label_addresses() const { return label_addresses_; }
// Exclusive upper bound of the indexes of paired control flow instructions
// (each corresponds to 3 dwords).
uint32_t cf_pair_index_bound() const { return cf_pair_index_bound_; }
// Upper bound of temporary registers addressed statically by the shader -
// highest static register address + 1, or 0 if no registers referenced this
// way. SQ_PROGRAM_CNTL is not always reliable - some draws (like single point
// draws with oPos = 0001 that are done by Xbox 360's Direct3D 9 sometimes;
// can be reproduced by launching Arrival in Halo 3 from the campaign lobby)
// that aren't supposed to cover any pixels use an invalid (zero)
// SQ_PROGRAM_CNTL, but with an outdated pixel shader loaded, in this case
// SQ_PROGRAM_CNTL may contain a number smaller than actually needed by the
// pixel shader - SQ_PROGRAM_CNTL should be used to go above this count if
// uses_register_dynamic_addressing is true.
uint32_t register_static_address_bound() const {
return register_static_address_bound_;
}
// True if the shader overrides the pixel depth.
bool writes_depth() const { return writes_depth_; }
// Whether the shader addresses temporary registers dynamically, thus
// SQ_PROGRAM_CNTL should determine the number of registers to use, not only
// register_static_address_bound.
bool uses_register_dynamic_addressing() const {
return uses_register_dynamic_addressing_;
}
// For building shader modification bits (and also for normalization of them),
// returns the amount of temporary registers that need to be allocated
// explicitly - if not using register dynamic addressing, the shader
// translator will use register_static_address_bound directly.
uint32_t GetDynamicAddressableRegisterCount(
uint32_t program_cntl_num_reg) const {
if (!uses_register_dynamic_addressing()) {
return 0;
}
return std::max((program_cntl_num_reg & 0x80)
? uint32_t(0)
: (program_cntl_num_reg + uint32_t(1)),
register_static_address_bound());
}
// True if the current shader has any `kill` instructions.
bool kills_pixels() const { return kills_pixels_; }
// Microcode disassembly in D3D format.
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
// True if the shader overrides the pixel depth.
bool writes_depth() const { return writes_depth_; }
// Whether the shader can have early depth and stencil writing enabled, unless
// alpha test or alpha to coverage is enabled.
bool implicit_early_z_write_allowed() const {
// TODO(Triang3l): Investigate what happens to memexport when the pixel
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
// depth/stencil.
return !writes_depth() && !kills_pixels() &&
memexport_stream_constants().empty();
}
// Whether each color render target is written to on any exection path.
uint32_t writes_color_targets() const { return writes_color_targets_; }
bool writes_color_target(uint32_t i) const {
return (writes_color_targets() & (uint32_t(1) << i)) != 0;
}
// Host translations with the specified modification bits. Not thread-safe
// with respect to translation creation/destruction.
const std::unordered_map<uint64_t, Translation*>& translations() const {
return translations_;
}
Translation* GetTranslation(uint64_t modification) const {
auto it = translations_.find(modification);
if (it != translations_.cend()) {
return it->second;
}
return nullptr;
}
Translation* GetOrCreateTranslation(uint64_t modification,
bool* is_new = nullptr);
// For shader storage loading, to remove a modification in case of translation
// failure. Not thread-safe.
void DestroyTranslation(uint64_t modification);
// An externally managed identifier of the shader storage the microcode of the
// shader was last written to, or was loaded from, to only write the shader
@ -835,33 +936,68 @@ class Shader {
protected:
friend class ShaderTranslator;
virtual Translation* CreateTranslationInstance(uint32_t modification);
virtual Translation* CreateTranslationInstance(uint64_t modification);
xenos::ShaderType shader_type_;
std::vector<uint32_t> ucode_data_;
uint64_t ucode_data_hash_;
// Modification bits -> translation.
std::unordered_map<uint32_t, Translation*> translations_;
// Whether info needed before translating has been gathered already - may be
// needed to determine which modifications are actually needed and make sense
// (for instance, there may be draws not covering anything and not allocating
// any pixel shader registers in SQ_PROGRAM_CNTL, but still using the pixel
// shader from the previous draw - in this case, every shader that happens to
// be before such draw will need to be translated again with a different
// dynamically addressed register count, which may cause compilation of
// different random pipelines across many random frames, thus causing
// stuttering - normally host pipeline states are deterministically only
// compiled when a new material appears in the game, and having the order of
// draws also matter in such unpredictable way would break this rule; limit
// the effect to shaders with dynamic register addressing only, which are
// extremely rare), also some info needed for drawing is collected during the
// ucode analysis.
bool is_ucode_analyzed_ = false;
// Whether setup of the post-translation parameters (listed below, plus those
// specific to the implementation) has been initiated, by any thread. If
// translation is performed on multiple threads, only one thread must be
// setting this up (other threads would write the same data anyway).
std::atomic_flag post_translation_info_set_up_ = ATOMIC_FLAG_INIT;
// Initialized after the first successful translation (these don't depend on
// the host-side modification bits).
std::string ucode_disassembly_;
std::vector<VertexBinding> vertex_bindings_;
std::vector<TextureBinding> texture_bindings_;
ConstantRegisterMap constant_register_map_ = {0};
bool writes_color_targets_[4] = {false, false, false, false};
bool writes_depth_ = false;
uint8_t memexport_eM_written_[kMaxMemExports] = {};
std::set<uint32_t> memexport_stream_constants_;
std::set<uint32_t> label_addresses_;
uint32_t cf_pair_index_bound_ = 0;
uint32_t register_static_address_bound_ = 0;
bool uses_register_dynamic_addressing_ = false;
bool kills_pixels_ = false;
std::vector<uint32_t> memexport_stream_constants_;
bool writes_depth_ = false;
uint32_t writes_color_targets_ = 0b0000;
// Modification bits -> translation.
std::unordered_map<uint64_t, Translation*> translations_;
uint32_t ucode_storage_index_ = UINT32_MAX;
private:
void GatherExecInformation(
const ParsedExecInstruction& instr,
ucode::VertexFetchInstruction& previous_vfetch_full,
uint32_t& unique_texture_bindings, uint32_t memexport_alloc_current_count,
uint32_t& memexport_eA_written, StringBuffer& ucode_disasm_buffer);
void GatherVertexFetchInformation(
const ucode::VertexFetchInstruction& op,
ucode::VertexFetchInstruction& previous_vfetch_full,
StringBuffer& ucode_disasm_buffer);
void GatherTextureFetchInformation(const ucode::TextureFetchInstruction& op,
uint32_t& unique_texture_bindings,
StringBuffer& ucode_disasm_buffer);
void GatherAluInstructionInformation(const ucode::AluInstruction& op,
uint32_t memexport_alloc_current_count,
uint32_t& memexport_eA_written,
StringBuffer& ucode_disasm_buffer);
void GatherOperandInformation(const InstructionOperand& operand);
void GatherFetchResultInformation(const InstructionResult& result);
void GatherAluResultInformation(const InstructionResult& result,
uint32_t memexport_alloc_current_count);
};
} // namespace gpu

View File

@ -17,6 +17,7 @@
#include "xenia/base/main.h"
#include "xenia/base/platform.h"
#include "xenia/base/string.h"
#include "xenia/base/string_buffer.h"
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/shader_translator.h"
#include "xenia/gpu/spirv_shader_translator.h"
@ -104,6 +105,8 @@ int shader_compiler_main(const std::vector<std::string>& args) {
auto shader = std::make_unique<Shader>(
shader_type, ucode_data_hash, ucode_dwords.data(), ucode_dwords.size());
shader->AnalyzeUcode(StringBuffer());
std::unique_ptr<ShaderTranslator> translator;
if (cvars::shader_output_type == "spirv" ||
cvars::shader_output_type == "spirvtext") {
@ -114,7 +117,15 @@ int shader_compiler_main(const std::vector<std::string>& args) {
0, cvars::shader_output_bindless_resources,
cvars::shader_output_dxbc_rov);
} else {
translator = std::make_unique<UcodeShaderTranslator>();
// Just output microcode disassembly generated during microcode information
// gathering.
if (!cvars::shader_output.empty()) {
auto output_file = filesystem::OpenFile(cvars::shader_output, "wb");
fwrite(shader->ucode_disassembly().c_str(), 1,
shader->ucode_disassembly().length(), output_file);
fclose(output_file);
}
return 0;
}
Shader::HostVertexShaderType host_vertex_shader_type =
@ -140,12 +151,12 @@ int shader_compiler_main(const std::vector<std::string>& args) {
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
}
}
uint32_t modification =
translator->GetDefaultModification(shader_type, host_vertex_shader_type);
uint64_t modification = translator->GetDefaultModification(
shader_type, 64, host_vertex_shader_type);
Shader::Translation* translation =
shader->GetOrCreateTranslation(modification);
translator->Translate(*translation);
translator->TranslateAnalyzedShader(*translation);
const void* source_data = translation->translated_binary().data();
size_t source_data_size = translation->translated_binary().size();

File diff suppressed because it is too large Load Diff

View File

@ -29,106 +29,43 @@ class ShaderTranslator {
public:
virtual ~ShaderTranslator();
virtual uint32_t GetDefaultModification(
virtual uint64_t GetDefaultModification(
xenos::ShaderType shader_type,
uint32_t dynamic_addressable_register_count,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex) const {
return 0;
}
bool Translate(Shader::Translation& translation, reg::SQ_PROGRAM_CNTL cntl);
bool Translate(Shader::Translation& translation);
// AnalyzeUcode must be done on the shader before translating!
bool TranslateAnalyzedShader(Shader::Translation& translation);
protected:
ShaderTranslator();
// Resets translator state before beginning translation.
// shader_type is passed here so translator implementations can generate
// special fixed shaders for internal use, and set up the type for this
// purpose.
virtual void Reset(xenos::ShaderType shader_type);
virtual void Reset();
// Current host-side modification being generated.
uint32_t modification() const { return modification_; }
// Shader and modification currently being translated.
Shader::Translation& current_translation() const { return *translation_; }
Shader& current_shader() const { return current_translation().shader(); }
// Register count from SQ_PROGRAM_CNTL, stored by the implementation in its
// modification bits.
virtual uint32_t GetModificationRegisterCount() const { return 64; }
// Register count.
uint32_t register_count() const { return register_count_; }
// True if the current shader is a vertex shader.
bool is_vertex_shader() const {
return shader_type_ == xenos::ShaderType::kVertex;
return current_shader().type() == xenos::ShaderType::kVertex;
}
// True if the current shader is a pixel shader.
bool is_pixel_shader() const {
return shader_type_ == xenos::ShaderType::kPixel;
}
// Labels that jumps (explicit or from loops) can be done to, gathered before
// translation.
const std::set<uint32_t>& label_addresses() const { return label_addresses_; }
// Used constant register info, populated before translation.
const Shader::ConstantRegisterMap& constant_register_map() const {
return constant_register_map_;
}
// True if the current shader addresses general-purpose registers with dynamic
// indices, set before translation. Doesn't include writes to r[#+a#] with an
// empty used write mask.
bool uses_register_dynamic_addressing() const {
return uses_register_dynamic_addressing_;
}
// True if the current shader writes to a color target on any execution path,
// set before translation. Doesn't include writes with an empty used write
// mask.
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
bool writes_any_color_target() const {
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
if (writes_color_targets_[i]) {
return true;
}
}
return false;
}
// True if the current shader overrides the pixel depth, set before
// translation. Doesn't include writes with an empty used write mask.
bool writes_depth() const { return writes_depth_; }
// True if the current shader has any `kill` instructions.
bool kills_pixels() const { return kills_pixels_; }
// A list of all vertex bindings, populated before translation occurs.
const std::vector<Shader::VertexBinding>& vertex_bindings() const {
return vertex_bindings_;
}
// A list of all texture bindings, populated before translation occurs.
const std::vector<Shader::TextureBinding>& texture_bindings() const {
return texture_bindings_;
return current_shader().type() == xenos::ShaderType::kPixel;
}
// Based on the number of AS_VS/PS_EXPORT_STREAM_* enum sets found in a game
// .pdb.
static constexpr uint32_t kMaxMemExports = 16;
// Bits indicating which eM# registers have been written to after each
// `alloc export`, for up to kMaxMemExports exports. This will contain zero
// for certain corrupt exports - that don't write to eA before writing to eM#,
// or if the write was done any way other than MAD with a stream constant.
const uint8_t* memexport_eM_written() const { return memexport_eM_written_; }
// All c# registers used as the addend in MAD operations to eA, populated
// before translation occurs.
const std::set<uint32_t>& memexport_stream_constants() const {
return memexport_stream_constants_;
}
// Temporary register count, accessible via static and dynamic addressing.
uint32_t register_count() const { return register_count_; }
// Whether the shader can have early depth and stencil writing enabled, unless
// alpha test or alpha to coverage is enabled. Data gathered before
// translation.
bool CanWriteZEarly() const {
// TODO(Triang3l): Investigate what happens to memexport when the pixel
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
// depth/stencil.
return !writes_depth_ && !kills_pixels_ &&
memexport_stream_constants_.empty();
}
// Current line number in the ucode disassembly.
size_t ucode_disasm_line_number() const { return ucode_disasm_line_number_; }
// Ucode disassembly buffer accumulated during translation.
StringBuffer& ucode_disasm_buffer() { return ucode_disasm_buffer_; }
// Emits a translation error that will be passed back in the result.
virtual void EmitTranslationError(const char* message, bool is_fatal = true);
@ -143,10 +80,7 @@ class ShaderTranslator {
}
// Handles post-translation tasks when the shader has been fully translated.
// setup_shader_post_translation_info if non-modification-specific parameters
// of the Shader object behind the Translation can be set by this invocation.
virtual void PostTranslation(Shader::Translation& translation,
bool setup_shader_post_translation_info) {}
virtual void PostTranslation() {}
// Sets the host disassembly on a shader.
void set_host_disassembly(Shader::Translation& translation,
std::string value) {
@ -201,130 +135,23 @@ class ShaderTranslator {
virtual void ProcessAluInstruction(const ParsedAluInstruction& instr) {}
private:
struct AluOpcodeInfo {
const char* name;
uint32_t argument_count;
uint32_t src_swizzle_component_count;
};
bool TranslateInternal(Shader::Translation& translation);
void MarkUcodeInstruction(uint32_t dword_offset);
void AppendUcodeDisasm(char c);
void AppendUcodeDisasm(const char* value);
void AppendUcodeDisasmFormat(const char* format, ...);
void GatherInstructionInformation(const ucode::ControlFlowInstruction& cf);
void GatherVertexFetchInformation(const ucode::VertexFetchInstruction& op);
void GatherTextureFetchInformation(const ucode::TextureFetchInstruction& op);
void TranslateControlFlowInstruction(const ucode::ControlFlowInstruction& cf);
void TranslateControlFlowNop(const ucode::ControlFlowInstruction& cf);
void TranslateControlFlowExec(const ucode::ControlFlowExecInstruction& cf);
void TranslateControlFlowCondExec(
const ucode::ControlFlowCondExecInstruction& cf);
void TranslateControlFlowCondExecPred(
const ucode::ControlFlowCondExecPredInstruction& cf);
void TranslateControlFlowLoopStart(
const ucode::ControlFlowLoopStartInstruction& cf);
void TranslateControlFlowLoopEnd(
const ucode::ControlFlowLoopEndInstruction& cf);
void TranslateControlFlowCondCall(
const ucode::ControlFlowCondCallInstruction& cf);
void TranslateControlFlowReturn(
const ucode::ControlFlowReturnInstruction& cf);
void TranslateControlFlowCondJmp(
const ucode::ControlFlowCondJmpInstruction& cf);
void TranslateControlFlowAlloc(const ucode::ControlFlowAllocInstruction& cf);
void TranslateExecInstructions(const ParsedExecInstruction& instr);
void TranslateVertexFetchInstruction(const ucode::VertexFetchInstruction& op);
void ParseVertexFetchInstruction(const ucode::VertexFetchInstruction& op,
ParsedVertexFetchInstruction* out_instr);
void TranslateTextureFetchInstruction(
const ucode::TextureFetchInstruction& op);
void ParseTextureFetchInstruction(const ucode::TextureFetchInstruction& op,
ParsedTextureFetchInstruction* out_instr);
void TranslateAluInstruction(const ucode::AluInstruction& op);
void ParseAluInstruction(const ucode::AluInstruction& op,
ParsedAluInstruction& out_instr) const;
static void ParseAluInstructionOperand(const ucode::AluInstruction& op,
uint32_t i,
uint32_t swizzle_component_count,
InstructionOperand& out_op);
static void ParseAluInstructionOperandSpecial(
const ucode::AluInstruction& op, InstructionStorageSource storage_source,
uint32_t reg, bool negate, int const_slot, uint32_t component_index,
InstructionOperand& out_op);
// Input shader metadata and microcode.
xenos::ShaderType shader_type_;
const uint32_t* ucode_dwords_;
size_t ucode_dword_count_;
uint32_t register_count_;
// Current host-side modification being generated.
uint32_t modification_ = 0;
// Current shader and modification being translated.
Shader::Translation* translation_ = nullptr;
// Accumulated translation errors.
std::vector<Shader::Error> errors_;
// Temporary register count, accessible via static and dynamic addressing.
uint32_t register_count_ = 0;
// Current control flow dword index.
uint32_t cf_index_ = 0;
// Microcode disassembly buffer, accumulated throughout the translation.
StringBuffer ucode_disasm_buffer_;
// Current line number in the disasm, which can be used for source annotation.
size_t ucode_disasm_line_number_ = 0;
// Last offset used when scanning for line numbers.
size_t previous_ucode_disasm_scan_offset_ = 0;
// Kept for supporting vfetch_mini.
ucode::VertexFetchInstruction previous_vfetch_full_;
// Labels that jumps (explicit or from loops) can be done to, gathered before
// translation.
std::set<uint32_t> label_addresses_;
// Detected binding information gathered before translation. Must not be
// affected by the modification index.
int total_attrib_count_ = 0;
std::vector<Shader::VertexBinding> vertex_bindings_;
std::vector<Shader::TextureBinding> texture_bindings_;
uint32_t unique_vertex_bindings_ = 0;
uint32_t unique_texture_bindings_ = 0;
// These all are gathered before translation.
// uses_register_dynamic_addressing_ for writes, writes_color_targets_,
// writes_depth_ don't include empty used write masks.
// Must not be affected by the modification index.
Shader::ConstantRegisterMap constant_register_map_ = {0};
bool uses_register_dynamic_addressing_ = false;
bool writes_color_targets_[4] = {false, false, false, false};
bool writes_depth_ = false;
bool kills_pixels_ = false;
// Memexport info is gathered before translation.
// Must not be affected by the modification index.
uint32_t memexport_alloc_count_ = 0;
// For register allocation in implementations - what was used after each
// `alloc export`.
uint32_t memexport_eA_written_ = 0;
uint8_t memexport_eM_written_[kMaxMemExports] = {0};
std::set<uint32_t> memexport_stream_constants_;
static const AluOpcodeInfo alu_vector_opcode_infos_[0x20];
static const AluOpcodeInfo alu_scalar_opcode_infos_[0x40];
};
class UcodeShaderTranslator : public ShaderTranslator {
public:
UcodeShaderTranslator() = default;
protected:
std::vector<uint8_t> CompleteTranslation() override;
};
} // namespace gpu

View File

@ -203,7 +203,9 @@ void SpirvShaderTranslator::StartTranslation() {
push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant,
push_constants_type, "push_consts");
if (!texture_bindings().empty()) {
const std::vector<Shader::TextureBinding>& texture_bindings =
current_shader().texture_bindings();
if (!texture_bindings.empty()) {
image_2d_type_ =
b.makeImageType(float_type_, spv::Dim::Dim2D, false, false, false, 1,
spv::ImageFormat::ImageFormatUnknown);
@ -220,7 +222,7 @@ void SpirvShaderTranslator::StartTranslation() {
b.makeSampledImageType(image_cube_type_)};
uint32_t num_tex_bindings = 0;
for (const auto& binding : texture_bindings()) {
for (const auto& binding : texture_bindings) {
// Calculate the highest binding index.
num_tex_bindings =
std::max(num_tex_bindings, uint32_t(binding.binding_index + 1));
@ -241,7 +243,7 @@ void SpirvShaderTranslator::StartTranslation() {
}
// Set up the map from binding -> ssbo index
for (const auto& binding : texture_bindings()) {
for (const auto& binding : texture_bindings) {
tex_binding_map_[binding.fetch_constant] =
uint32_t(binding.binding_index);
}
@ -254,7 +256,9 @@ void SpirvShaderTranslator::StartTranslation() {
// Vertex inputs/outputs
// Inputs: 32 SSBOs on DS 2 binding 0
if (!vertex_bindings().empty()) {
const std::vector<Shader::VertexBinding>& vertex_bindings =
current_shader().vertex_bindings();
if (!vertex_bindings.empty()) {
// Runtime array for vertex data
Id vtx_t = b.makeRuntimeArray(uint_type_);
b.addDecoration(vtx_t, spv::Decoration::DecorationArrayStride,
@ -269,7 +273,7 @@ void SpirvShaderTranslator::StartTranslation() {
// Create the vertex bindings variable.
Id vtx_a_t = b.makeArrayType(
vtx_s, b.makeUintConstant(uint32_t(vertex_bindings().size())), 0);
vtx_s, b.makeUintConstant(uint32_t(vertex_bindings.size())), 0);
vtx_ = b.createVariable(spv::StorageClass::StorageClassUniform, vtx_a_t,
"vertex_bindings");
@ -279,7 +283,7 @@ void SpirvShaderTranslator::StartTranslation() {
b.addDecoration(vtx_, spv::Decoration::DecorationNonWritable);
// Set up the map from binding -> ssbo index
for (const auto& binding : vertex_bindings()) {
for (const auto& binding : vertex_bindings) {
vtx_binding_map_[binding.fetch_constant] = binding.binding_index;
}
}
@ -494,7 +498,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft);
// If we write a new depth value, we must declare this mode!
if (writes_depth()) {
if (current_shader().writes_depth()) {
b.addExecutionMode(mainFn, spv::ExecutionModeDepthReplacing);
}
@ -667,8 +671,12 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
return spirv_bytes;
}
void SpirvShaderTranslator::PostTranslation(
Shader::Translation& translation, bool setup_shader_post_translation_info) {
void SpirvShaderTranslator::PostTranslation() {
Shader::Translation& translation = current_translation();
if (!translation.is_valid()) {
return;
}
// Validation.
if (cvars::spv_validate) {
auto validation = validator_.Validate(

View File

@ -58,11 +58,23 @@ class SpirvShaderTranslator : public ShaderTranslator {
SpirvShaderTranslator();
~SpirvShaderTranslator() override;
// Not storing anything else in modifications (as this shader translator is
// being replaced anyway).
uint64_t GetDefaultModification(
xenos::ShaderType shader_type,
uint32_t dynamic_addressable_register_count,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex) const override {
return dynamic_addressable_register_count;
}
protected:
virtual uint32_t GetModificationRegisterCount() const {
return uint32_t(current_translation().modification());
}
void StartTranslation() override;
std::vector<uint8_t> CompleteTranslation() override;
void PostTranslation(Shader::Translation& translation,
bool setup_shader_post_translation_info) override;
void PostTranslation() override;
void PreProcessControlFlowInstructions(
std::vector<ucode::ControlFlowInstruction> instrs) override;

View File

@ -431,15 +431,14 @@ XEPACKEDUNION(ControlFlowInstruction, {
static_assert_size(ControlFlowInstruction, 8);
inline void UnpackControlFlowInstructions(const uint32_t* dwords,
ControlFlowInstruction* out_a,
ControlFlowInstruction* out_b) {
ControlFlowInstruction* out_ab) {
uint32_t dword_0 = dwords[0];
uint32_t dword_1 = dwords[1];
uint32_t dword_2 = dwords[2];
out_a->dword_0 = dword_0;
out_a->dword_1 = dword_1 & 0xFFFF;
out_b->dword_0 = (dword_1 >> 16) | (dword_2 << 16);
out_b->dword_1 = dword_2 >> 16;
out_ab[0].dword_0 = dword_0;
out_ab[0].dword_1 = dword_1 & 0xFFFF;
out_ab[1].dword_0 = (dword_1 >> 16) | (dword_2 << 16);
out_ab[1].dword_1 = dword_2 >> 16;
}
enum class FetchOpcode : uint32_t {

View File

@ -364,10 +364,11 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state,
}
bool PipelineCache::TranslateShader(
VulkanShader::VulkanTranslation& translation, reg::SQ_PROGRAM_CNTL cntl) {
VulkanShader::VulkanTranslation& translation) {
translation.shader().AnalyzeUcode(ucode_disasm_buffer_);
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_->Translate(translation, cntl)) {
if (!shader_translator_->TranslateAnalyzedShader(translation)) {
XELOGE("Shader translation failed; marking shader as ignored");
return false;
}
@ -1071,9 +1072,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
static_cast<VulkanShader::VulkanTranslation*>(
vertex_shader->GetOrCreateTranslation(
shader_translator_->GetDefaultModification(
xenos::ShaderType::kVertex)));
xenos::ShaderType::kVertex,
vertex_shader->GetDynamicAddressableRegisterCount(
regs.sq_program_cntl.vs_num_reg))));
if (!vertex_shader_translation->is_translated() &&
!TranslateShader(*vertex_shader_translation, regs.sq_program_cntl)) {
!TranslateShader(*vertex_shader_translation)) {
XELOGE("Failed to translate the vertex shader!");
return UpdateStatus::kError;
}
@ -1083,9 +1086,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
pixel_shader_translation = static_cast<VulkanShader::VulkanTranslation*>(
pixel_shader->GetOrCreateTranslation(
shader_translator_->GetDefaultModification(
xenos::ShaderType::kPixel)));
xenos::ShaderType::kPixel,
pixel_shader->GetDynamicAddressableRegisterCount(
regs.sq_program_cntl.ps_num_reg))));
if (!pixel_shader_translation->is_translated() &&
!TranslateShader(*pixel_shader_translation, regs.sq_program_cntl)) {
!TranslateShader(*pixel_shader_translation)) {
XELOGE("Failed to translate the pixel shader!");
return UpdateStatus::kError;
}

View File

@ -12,6 +12,7 @@
#include <unordered_map>
#include "xenia/base/string_buffer.h"
#include "xenia/base/xxhash.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/spirv_shader_translator.h"
@ -78,8 +79,7 @@ class PipelineCache {
// state.
VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
bool TranslateShader(VulkanShader::VulkanTranslation& translation,
reg::SQ_PROGRAM_CNTL cntl);
bool TranslateShader(VulkanShader::VulkanTranslation& translation);
void DumpShaderDisasmAMD(VkPipeline pipeline);
void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
@ -92,6 +92,8 @@ class PipelineCache {
RegisterFile* register_file_ = nullptr;
ui::vulkan::VulkanDevice* device_ = nullptr;
// Temporary storage for AnalyzeUcode calls.
StringBuffer ucode_disasm_buffer_;
// Reusable shader translator.
std::unique_ptr<ShaderTranslator> shader_translator_ = nullptr;
// Disassembler used to get the SPIRV disasm. Only used in debug.

View File

@ -73,7 +73,7 @@ bool VulkanShader::VulkanTranslation::Prepare() {
}
Shader::Translation* VulkanShader::CreateTranslationInstance(
uint32_t modification) {
uint64_t modification) {
return new VulkanTranslation(*this, modification);
}

View File

@ -23,7 +23,7 @@ class VulkanShader : public Shader {
public:
class VulkanTranslation : public Translation {
public:
VulkanTranslation(VulkanShader& shader, uint32_t modification)
VulkanTranslation(VulkanShader& shader, uint64_t modification)
: Translation(shader, modification) {}
~VulkanTranslation() override;
@ -41,7 +41,7 @@ class VulkanShader : public Shader {
uint32_t dword_count);
protected:
Translation* CreateTranslationInstance(uint32_t modification) override;
Translation* CreateTranslationInstance(uint64_t modification) override;
private:
ui::vulkan::VulkanDevice* device_ = nullptr;

View File

@ -546,33 +546,6 @@ inline int GetVertexFormatComponentCount(VertexFormat format) {
}
}
inline int GetVertexFormatSizeInWords(VertexFormat format) {
switch (format) {
case VertexFormat::k_8_8_8_8:
case VertexFormat::k_2_10_10_10:
case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10:
case VertexFormat::k_16_16:
case VertexFormat::k_16_16_FLOAT:
case VertexFormat::k_32:
case VertexFormat::k_32_FLOAT:
return 1;
case VertexFormat::k_16_16_16_16:
case VertexFormat::k_16_16_16_16_FLOAT:
case VertexFormat::k_32_32:
case VertexFormat::k_32_32_FLOAT:
return 2;
case VertexFormat::k_32_32_32_FLOAT:
return 3;
case VertexFormat::k_32_32_32_32:
case VertexFormat::k_32_32_32_32_FLOAT:
return 4;
default:
assert_unhandled_case(format);
return 1;
}
}
inline uint32_t GetVertexFormatNeededWords(VertexFormat format,
uint32_t used_components) {
assert_zero(used_components & ~uint32_t(0b1111));