[GPU] Dynamic r# count via shader modifications + refactoring
This commit is contained in:
parent
b106aa88e6
commit
e6fa0ad139
|
@ -99,14 +99,11 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
|
|||
}
|
||||
|
||||
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
|
||||
const Shader* pixel_shader) const {
|
||||
if (pixel_shader == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t shader_writes_color_targets) const {
|
||||
auto& regs = *register_file_;
|
||||
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!pixel_shader->writes_color_target(i)) {
|
||||
if (!(shader_writes_color_targets & (1 << i))) {
|
||||
color_mask &= ~(0xF << (i * 4));
|
||||
}
|
||||
}
|
||||
|
@ -167,14 +164,18 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN
|
||||
: D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
|
||||
uint32_t texture_count_vertex, sampler_count_vertex;
|
||||
vertex_shader->GetTextureBindings(texture_count_vertex);
|
||||
vertex_shader->GetSamplerBindings(sampler_count_vertex);
|
||||
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
|
||||
if (pixel_shader != nullptr) {
|
||||
pixel_shader->GetTextureBindings(texture_count_pixel);
|
||||
pixel_shader->GetSamplerBindings(sampler_count_pixel);
|
||||
}
|
||||
uint32_t texture_count_vertex =
|
||||
uint32_t(vertex_shader->GetTextureBindingsAfterTranslation().size());
|
||||
uint32_t sampler_count_vertex =
|
||||
uint32_t(vertex_shader->GetSamplerBindingsAfterTranslation().size());
|
||||
uint32_t texture_count_pixel =
|
||||
pixel_shader
|
||||
? uint32_t(pixel_shader->GetTextureBindingsAfterTranslation().size())
|
||||
: 0;
|
||||
uint32_t sampler_count_pixel =
|
||||
pixel_shader
|
||||
? uint32_t(pixel_shader->GetSamplerBindingsAfterTranslation().size())
|
||||
: 0;
|
||||
|
||||
// Better put the pixel texture/sampler in the lower bits probably because it
|
||||
// changes often.
|
||||
|
@ -383,33 +384,26 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices(
|
||||
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
|
||||
RootBindfulExtraParameterIndices& indices_out) {
|
||||
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
|
||||
if (pixel_shader != nullptr) {
|
||||
pixel_shader->GetTextureBindings(texture_count_pixel);
|
||||
pixel_shader->GetSamplerBindings(sampler_count_pixel);
|
||||
}
|
||||
uint32_t texture_count_vertex, sampler_count_vertex;
|
||||
vertex_shader->GetTextureBindings(texture_count_vertex);
|
||||
vertex_shader->GetSamplerBindings(sampler_count_vertex);
|
||||
|
||||
uint32_t index = kRootParameter_Bindful_Count_Base;
|
||||
if (texture_count_pixel != 0) {
|
||||
if (pixel_shader &&
|
||||
!pixel_shader->GetTextureBindingsAfterTranslation().empty()) {
|
||||
indices_out.textures_pixel = index++;
|
||||
} else {
|
||||
indices_out.textures_pixel = RootBindfulExtraParameterIndices::kUnavailable;
|
||||
}
|
||||
if (sampler_count_pixel != 0) {
|
||||
if (pixel_shader &&
|
||||
!pixel_shader->GetSamplerBindingsAfterTranslation().empty()) {
|
||||
indices_out.samplers_pixel = index++;
|
||||
} else {
|
||||
indices_out.samplers_pixel = RootBindfulExtraParameterIndices::kUnavailable;
|
||||
}
|
||||
if (texture_count_vertex != 0) {
|
||||
if (!vertex_shader->GetTextureBindingsAfterTranslation().empty()) {
|
||||
indices_out.textures_vertex = index++;
|
||||
} else {
|
||||
indices_out.textures_vertex =
|
||||
RootBindfulExtraParameterIndices::kUnavailable;
|
||||
}
|
||||
if (sampler_count_vertex != 0) {
|
||||
if (!vertex_shader->GetSamplerBindingsAfterTranslation().empty()) {
|
||||
indices_out.samplers_vertex = index++;
|
||||
} else {
|
||||
indices_out.samplers_vertex =
|
||||
|
@ -1839,10 +1833,14 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
// Need a pixel shader in normal color mode.
|
||||
return false;
|
||||
}
|
||||
// Gather shader ucode information to get the color mask, which is needed by
|
||||
// the render target cache, and memexport configuration, and also get the
|
||||
// current shader modification bits.
|
||||
DxbcShaderTranslator::Modification vertex_shader_modification;
|
||||
DxbcShaderTranslator::Modification pixel_shader_modification;
|
||||
if (!pipeline_cache_->GetCurrentShaderModifications(
|
||||
vertex_shader_modification, pixel_shader_modification)) {
|
||||
if (!pipeline_cache_->AnalyzeShaderUcodeAndGetCurrentModifications(
|
||||
vertex_shader, pixel_shader, vertex_shader_modification,
|
||||
pixel_shader_modification)) {
|
||||
return false;
|
||||
}
|
||||
D3D12Shader::D3D12Translation* vertex_shader_translation =
|
||||
|
@ -1854,13 +1852,6 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
pixel_shader->GetOrCreateTranslation(
|
||||
pixel_shader_modification.value))
|
||||
: nullptr;
|
||||
// Translate the shaders now to get memexport configuration and color mask,
|
||||
// which is needed by the render target cache, and also to get used textures
|
||||
// and samplers.
|
||||
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader_translation,
|
||||
pixel_shader_translation)) {
|
||||
return false;
|
||||
}
|
||||
bool tessellated = vertex_shader_modification.host_vertex_shader_type !=
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
|
||||
|
@ -1889,7 +1880,10 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
BeginSubmission(true);
|
||||
|
||||
// Set up the render targets - this may bind pipelines.
|
||||
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
|
||||
uint32_t pixel_shader_writes_color_targets =
|
||||
pixel_shader ? pixel_shader->writes_color_targets() : 0;
|
||||
if (!render_target_cache_->UpdateRenderTargets(
|
||||
pixel_shader_writes_color_targets)) {
|
||||
return false;
|
||||
}
|
||||
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
|
||||
|
@ -1958,13 +1952,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
line_loop_closing_index = 0;
|
||||
}
|
||||
|
||||
// Update the textures - this may bind pipelines.
|
||||
uint32_t used_texture_mask =
|
||||
vertex_shader->GetUsedTextureMask() |
|
||||
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
||||
texture_cache_->RequestTextures(used_texture_mask);
|
||||
|
||||
// Create the pipeline if needed and bind it.
|
||||
// Translate the shaders and create the pipeline if needed.
|
||||
void* pipeline_handle;
|
||||
ID3D12RootSignature* root_signature;
|
||||
if (!pipeline_cache_->ConfigurePipeline(
|
||||
|
@ -1974,6 +1962,17 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
pipeline_render_targets, &pipeline_handle, &root_signature)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the textures - this may bind pipelines.
|
||||
uint32_t used_texture_mask =
|
||||
vertex_shader->GetUsedTextureMaskAfterTranslation() |
|
||||
(pixel_shader != nullptr
|
||||
? pixel_shader->GetUsedTextureMaskAfterTranslation()
|
||||
: 0);
|
||||
texture_cache_->RequestTextures(used_texture_mask);
|
||||
|
||||
// Bind the pipeline after configuring it and doing everything that may bind
|
||||
// other pipelines.
|
||||
if (current_cached_pipeline_ != pipeline_handle) {
|
||||
deferred_command_list_.SetPipelineStateHandle(
|
||||
reinterpret_cast<void*>(pipeline_handle));
|
||||
|
@ -2026,7 +2025,9 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
memexport_used, primitive_polygonal, line_loop_closing_index,
|
||||
indexed ? index_buffer_info->endianness : xenos::Endian::kNone,
|
||||
viewport_info, pixel_size_x, pixel_size_y, used_texture_mask,
|
||||
GetCurrentColorMask(pixel_shader), pipeline_render_targets);
|
||||
pixel_shader ? GetCurrentColorMask(pixel_shader->writes_color_targets())
|
||||
: 0,
|
||||
pipeline_render_targets);
|
||||
|
||||
// Update constant buffers, descriptors and root parameters.
|
||||
if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) {
|
||||
|
@ -2089,9 +2090,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
MemExportRange memexport_ranges[512];
|
||||
uint32_t memexport_range_count = 0;
|
||||
if (memexport_used_vertex) {
|
||||
const std::vector<uint32_t>& memexport_stream_constants_vertex =
|
||||
vertex_shader->memexport_stream_constants();
|
||||
for (uint32_t constant_index : memexport_stream_constants_vertex) {
|
||||
for (uint32_t constant_index :
|
||||
vertex_shader->memexport_stream_constants()) {
|
||||
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4);
|
||||
if (memexport_stream.index_count == 0) {
|
||||
|
@ -2132,9 +2132,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
}
|
||||
}
|
||||
if (memexport_used_pixel) {
|
||||
const std::vector<uint32_t>& memexport_stream_constants_pixel =
|
||||
pixel_shader->memexport_stream_constants();
|
||||
for (uint32_t constant_index : memexport_stream_constants_pixel) {
|
||||
for (uint32_t constant_index : pixel_shader->memexport_stream_constants()) {
|
||||
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4);
|
||||
if (memexport_stream.index_count == 0) {
|
||||
|
@ -3588,20 +3586,21 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
vertex_shader->GetTextureBindingLayoutUserUID();
|
||||
size_t sampler_layout_uid_vertex =
|
||||
vertex_shader->GetSamplerBindingLayoutUserUID();
|
||||
uint32_t texture_count_vertex, sampler_count_vertex;
|
||||
const D3D12Shader::TextureBinding* textures_vertex =
|
||||
vertex_shader->GetTextureBindings(texture_count_vertex);
|
||||
const D3D12Shader::SamplerBinding* samplers_vertex =
|
||||
vertex_shader->GetSamplerBindings(sampler_count_vertex);
|
||||
const std::vector<D3D12Shader::TextureBinding>& textures_vertex =
|
||||
vertex_shader->GetTextureBindingsAfterTranslation();
|
||||
const std::vector<D3D12Shader::SamplerBinding>& samplers_vertex =
|
||||
vertex_shader->GetSamplerBindingsAfterTranslation();
|
||||
size_t texture_count_vertex = textures_vertex.size();
|
||||
size_t sampler_count_vertex = samplers_vertex.size();
|
||||
if (sampler_count_vertex) {
|
||||
if (current_sampler_layout_uid_vertex_ != sampler_layout_uid_vertex) {
|
||||
current_sampler_layout_uid_vertex_ = sampler_layout_uid_vertex;
|
||||
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
||||
bindful_samplers_written_vertex_ = false;
|
||||
}
|
||||
current_samplers_vertex_.resize(std::max(current_samplers_vertex_.size(),
|
||||
size_t(sampler_count_vertex)));
|
||||
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
|
||||
current_samplers_vertex_.resize(
|
||||
std::max(current_samplers_vertex_.size(), sampler_count_vertex));
|
||||
for (size_t i = 0; i < sampler_count_vertex; ++i) {
|
||||
TextureCache::SamplerParameters parameters =
|
||||
texture_cache_->GetSamplerParameters(samplers_vertex[i]);
|
||||
if (current_samplers_vertex_[i] != parameters) {
|
||||
|
@ -3615,14 +3614,16 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
// Get textures and samplers used by the pixel shader, check if the last used
|
||||
// samplers are compatible and update them.
|
||||
size_t texture_layout_uid_pixel, sampler_layout_uid_pixel;
|
||||
uint32_t texture_count_pixel, sampler_count_pixel;
|
||||
const D3D12Shader::TextureBinding* textures_pixel;
|
||||
const D3D12Shader::SamplerBinding* samplers_pixel;
|
||||
const std::vector<D3D12Shader::TextureBinding>* textures_pixel;
|
||||
const std::vector<D3D12Shader::SamplerBinding>* samplers_pixel;
|
||||
size_t texture_count_pixel, sampler_count_pixel;
|
||||
if (pixel_shader != nullptr) {
|
||||
texture_layout_uid_pixel = pixel_shader->GetTextureBindingLayoutUserUID();
|
||||
sampler_layout_uid_pixel = pixel_shader->GetSamplerBindingLayoutUserUID();
|
||||
textures_pixel = pixel_shader->GetTextureBindings(texture_count_pixel);
|
||||
samplers_pixel = pixel_shader->GetSamplerBindings(sampler_count_pixel);
|
||||
textures_pixel = &pixel_shader->GetTextureBindingsAfterTranslation();
|
||||
texture_count_pixel = textures_pixel->size();
|
||||
samplers_pixel = &pixel_shader->GetSamplerBindingsAfterTranslation();
|
||||
sampler_count_pixel = samplers_pixel->size();
|
||||
if (sampler_count_pixel) {
|
||||
if (current_sampler_layout_uid_pixel_ != sampler_layout_uid_pixel) {
|
||||
current_sampler_layout_uid_pixel_ = sampler_layout_uid_pixel;
|
||||
|
@ -3633,7 +3634,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
size_t(sampler_count_pixel)));
|
||||
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
|
||||
TextureCache::SamplerParameters parameters =
|
||||
texture_cache_->GetSamplerParameters(samplers_pixel[i]);
|
||||
texture_cache_->GetSamplerParameters((*samplers_pixel)[i]);
|
||||
if (current_samplers_pixel_[i] != parameters) {
|
||||
current_samplers_pixel_[i] = parameters;
|
||||
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
||||
|
@ -3663,7 +3664,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
cbuffer_binding_descriptor_indices_vertex_.up_to_date &&
|
||||
(current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
|
||||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
||||
current_texture_srv_keys_vertex_.data(), textures_vertex,
|
||||
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
|
||||
texture_count_vertex))) {
|
||||
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
||||
}
|
||||
|
@ -3671,7 +3672,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
cbuffer_binding_descriptor_indices_pixel_.up_to_date &&
|
||||
(current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
|
||||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
||||
current_texture_srv_keys_pixel_.data(), textures_pixel,
|
||||
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
|
||||
texture_count_pixel))) {
|
||||
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
||||
}
|
||||
|
@ -3804,15 +3805,14 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
uint32_t* descriptor_indices =
|
||||
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
||||
frame_current_,
|
||||
std::max(texture_count_vertex + sampler_count_vertex,
|
||||
uint32_t(1)) *
|
||||
std::max(texture_count_vertex + sampler_count_vertex, size_t(1)) *
|
||||
sizeof(uint32_t),
|
||||
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||
&cbuffer_binding_descriptor_indices_vertex_.address));
|
||||
if (!descriptor_indices) {
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < texture_count_vertex; ++i) {
|
||||
for (size_t i = 0; i < texture_count_vertex; ++i) {
|
||||
const D3D12Shader::TextureBinding& texture = textures_vertex[i];
|
||||
descriptor_indices[texture.bindless_descriptor_index] =
|
||||
texture_cache_->GetActiveTextureBindlessSRVIndex(texture) -
|
||||
|
@ -3824,11 +3824,11 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
std::max(current_texture_srv_keys_vertex_.size(),
|
||||
size_t(texture_count_vertex)));
|
||||
texture_cache_->WriteActiveTextureSRVKeys(
|
||||
current_texture_srv_keys_vertex_.data(), textures_vertex,
|
||||
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
|
||||
texture_count_vertex);
|
||||
}
|
||||
// Current samplers have already been updated.
|
||||
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
|
||||
for (size_t i = 0; i < sampler_count_vertex; ++i) {
|
||||
descriptor_indices[samplers_vertex[i].bindless_descriptor_index] =
|
||||
current_sampler_bindless_indices_vertex_[i];
|
||||
}
|
||||
|
@ -3841,15 +3841,15 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
uint32_t* descriptor_indices =
|
||||
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
||||
frame_current_,
|
||||
std::max(texture_count_pixel + sampler_count_pixel, uint32_t(1)) *
|
||||
std::max(texture_count_pixel + sampler_count_pixel, size_t(1)) *
|
||||
sizeof(uint32_t),
|
||||
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||
&cbuffer_binding_descriptor_indices_pixel_.address));
|
||||
if (!descriptor_indices) {
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < texture_count_pixel; ++i) {
|
||||
const D3D12Shader::TextureBinding& texture = textures_pixel[i];
|
||||
for (size_t i = 0; i < texture_count_pixel; ++i) {
|
||||
const D3D12Shader::TextureBinding& texture = (*textures_pixel)[i];
|
||||
descriptor_indices[texture.bindless_descriptor_index] =
|
||||
texture_cache_->GetActiveTextureBindlessSRVIndex(texture) -
|
||||
uint32_t(SystemBindlessView::kUnboundedSRVsStart);
|
||||
|
@ -3860,12 +3860,12 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
std::max(current_texture_srv_keys_pixel_.size(),
|
||||
size_t(texture_count_pixel)));
|
||||
texture_cache_->WriteActiveTextureSRVKeys(
|
||||
current_texture_srv_keys_pixel_.data(), textures_pixel,
|
||||
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
|
||||
texture_count_pixel);
|
||||
}
|
||||
// Current samplers have already been updated.
|
||||
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
|
||||
descriptor_indices[samplers_pixel[i].bindless_descriptor_index] =
|
||||
for (size_t i = 0; i < sampler_count_pixel; ++i) {
|
||||
descriptor_indices[(*samplers_pixel)[i].bindless_descriptor_index] =
|
||||
current_sampler_bindless_indices_pixel_[i];
|
||||
}
|
||||
cbuffer_binding_descriptor_indices_pixel_.up_to_date = true;
|
||||
|
@ -3884,14 +3884,14 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
(!bindful_textures_written_vertex_ ||
|
||||
current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
|
||||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
||||
current_texture_srv_keys_vertex_.data(), textures_vertex,
|
||||
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
|
||||
texture_count_vertex));
|
||||
bool write_textures_pixel =
|
||||
texture_count_pixel &&
|
||||
(!bindful_textures_written_pixel_ ||
|
||||
current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
|
||||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
||||
current_texture_srv_keys_pixel_.data(), textures_pixel,
|
||||
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
|
||||
texture_count_pixel));
|
||||
bool write_samplers_vertex =
|
||||
sampler_count_vertex && !bindful_samplers_written_vertex_;
|
||||
|
@ -3899,7 +3899,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
sampler_count_pixel && !bindful_samplers_written_pixel_;
|
||||
|
||||
// Allocate the descriptors.
|
||||
uint32_t view_count_partial_update = 0;
|
||||
size_t view_count_partial_update = 0;
|
||||
if (write_textures_vertex) {
|
||||
view_count_partial_update += texture_count_vertex;
|
||||
}
|
||||
|
@ -3907,7 +3907,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
view_count_partial_update += texture_count_pixel;
|
||||
}
|
||||
// All the constants + shared memory SRV and UAV + textures.
|
||||
uint32_t view_count_full_update =
|
||||
size_t view_count_full_update =
|
||||
2 + texture_count_vertex + texture_count_pixel;
|
||||
if (edram_rov_used_) {
|
||||
// + EDRAM UAV.
|
||||
|
@ -3917,14 +3917,14 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle;
|
||||
uint32_t descriptor_size_view = provider.GetViewDescriptorSize();
|
||||
uint64_t view_heap_index = RequestViewBindfulDescriptors(
|
||||
draw_view_bindful_heap_index_, view_count_partial_update,
|
||||
view_count_full_update, view_cpu_handle, view_gpu_handle);
|
||||
draw_view_bindful_heap_index_, uint32_t(view_count_partial_update),
|
||||
uint32_t(view_count_full_update), view_cpu_handle, view_gpu_handle);
|
||||
if (view_heap_index ==
|
||||
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
XELOGE("Failed to allocate view descriptors");
|
||||
return false;
|
||||
}
|
||||
uint32_t sampler_count_partial_update = 0;
|
||||
size_t sampler_count_partial_update = 0;
|
||||
if (write_samplers_vertex) {
|
||||
sampler_count_partial_update += sampler_count_vertex;
|
||||
}
|
||||
|
@ -3938,9 +3938,10 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid;
|
||||
if (sampler_count_vertex != 0 || sampler_count_pixel != 0) {
|
||||
sampler_heap_index = RequestSamplerBindfulDescriptors(
|
||||
draw_sampler_bindful_heap_index_, sampler_count_partial_update,
|
||||
sampler_count_vertex + sampler_count_pixel, sampler_cpu_handle,
|
||||
sampler_gpu_handle);
|
||||
draw_sampler_bindful_heap_index_,
|
||||
uint32_t(sampler_count_partial_update),
|
||||
uint32_t(sampler_count_vertex + sampler_count_pixel),
|
||||
sampler_cpu_handle, sampler_gpu_handle);
|
||||
if (sampler_heap_index ==
|
||||
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
XELOGE("Failed to allocate sampler descriptors");
|
||||
|
@ -3985,7 +3986,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
assert_true(current_graphics_root_bindful_extras_.textures_vertex !=
|
||||
RootBindfulExtraParameterIndices::kUnavailable);
|
||||
gpu_handle_textures_vertex_ = view_gpu_handle;
|
||||
for (uint32_t i = 0; i < texture_count_vertex; ++i) {
|
||||
for (size_t i = 0; i < texture_count_vertex; ++i) {
|
||||
texture_cache_->WriteActiveTextureBindfulSRV(textures_vertex[i],
|
||||
view_cpu_handle);
|
||||
view_cpu_handle.ptr += descriptor_size_view;
|
||||
|
@ -3996,7 +3997,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
std::max(current_texture_srv_keys_vertex_.size(),
|
||||
size_t(texture_count_vertex)));
|
||||
texture_cache_->WriteActiveTextureSRVKeys(
|
||||
current_texture_srv_keys_vertex_.data(), textures_vertex,
|
||||
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
|
||||
texture_count_vertex);
|
||||
bindful_textures_written_vertex_ = true;
|
||||
current_graphics_root_up_to_date_ &=
|
||||
|
@ -4006,8 +4007,8 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
assert_true(current_graphics_root_bindful_extras_.textures_pixel !=
|
||||
RootBindfulExtraParameterIndices::kUnavailable);
|
||||
gpu_handle_textures_pixel_ = view_gpu_handle;
|
||||
for (uint32_t i = 0; i < texture_count_pixel; ++i) {
|
||||
texture_cache_->WriteActiveTextureBindfulSRV(textures_pixel[i],
|
||||
for (size_t i = 0; i < texture_count_pixel; ++i) {
|
||||
texture_cache_->WriteActiveTextureBindfulSRV((*textures_pixel)[i],
|
||||
view_cpu_handle);
|
||||
view_cpu_handle.ptr += descriptor_size_view;
|
||||
view_gpu_handle.ptr += descriptor_size_view;
|
||||
|
@ -4016,7 +4017,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
current_texture_srv_keys_pixel_.resize(std::max(
|
||||
current_texture_srv_keys_pixel_.size(), size_t(texture_count_pixel)));
|
||||
texture_cache_->WriteActiveTextureSRVKeys(
|
||||
current_texture_srv_keys_pixel_.data(), textures_pixel,
|
||||
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
|
||||
texture_count_pixel);
|
||||
bindful_textures_written_pixel_ = true;
|
||||
current_graphics_root_up_to_date_ &=
|
||||
|
@ -4026,7 +4027,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
assert_true(current_graphics_root_bindful_extras_.samplers_vertex !=
|
||||
RootBindfulExtraParameterIndices::kUnavailable);
|
||||
gpu_handle_samplers_vertex_ = sampler_gpu_handle;
|
||||
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
|
||||
for (size_t i = 0; i < sampler_count_vertex; ++i) {
|
||||
texture_cache_->WriteSampler(current_samplers_vertex_[i],
|
||||
sampler_cpu_handle);
|
||||
sampler_cpu_handle.ptr += descriptor_size_sampler;
|
||||
|
@ -4041,7 +4042,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
assert_true(current_graphics_root_bindful_extras_.samplers_pixel !=
|
||||
RootBindfulExtraParameterIndices::kUnavailable);
|
||||
gpu_handle_samplers_pixel_ = sampler_gpu_handle;
|
||||
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
|
||||
for (size_t i = 0; i < sampler_count_pixel; ++i) {
|
||||
texture_cache_->WriteSampler(current_samplers_pixel_[i],
|
||||
sampler_cpu_handle);
|
||||
sampler_cpu_handle.ptr += descriptor_size_sampler;
|
||||
|
|
|
@ -89,7 +89,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// there are 4 render targets bound with the same EDRAM base (clearly not
|
||||
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
|
||||
// conflict with each other.
|
||||
uint32_t GetCurrentColorMask(const Shader* pixel_shader) const;
|
||||
uint32_t GetCurrentColorMask(uint32_t shader_writes_color_targets) const;
|
||||
|
||||
void PushTransitionBarrier(
|
||||
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
||||
|
|
|
@ -99,7 +99,7 @@ void D3D12Shader::D3D12Translation::DisassembleDxbcAndDxil(
|
|||
}
|
||||
|
||||
Shader::Translation* D3D12Shader::CreateTranslationInstance(
|
||||
uint32_t modification) {
|
||||
uint64_t modification) {
|
||||
return new D3D12Translation(*this, modification);
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ class D3D12Shader : public DxbcShader {
|
|||
public:
|
||||
class D3D12Translation : public DxbcTranslation {
|
||||
public:
|
||||
D3D12Translation(D3D12Shader& shader, uint32_t modification)
|
||||
D3D12Translation(D3D12Shader& shader, uint64_t modification)
|
||||
: DxbcTranslation(shader, modification) {}
|
||||
|
||||
void DisassembleDxbcAndDxil(const ui::d3d12::D3D12Provider& provider,
|
||||
|
@ -60,7 +60,7 @@ class D3D12Shader : public DxbcShader {
|
|||
}
|
||||
|
||||
protected:
|
||||
Translation* CreateTranslationInstance(uint32_t modification) override;
|
||||
Translation* CreateTranslationInstance(uint64_t modification) override;
|
||||
|
||||
private:
|
||||
std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <mutex>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/assert.h"
|
||||
|
@ -29,6 +30,7 @@
|
|||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
|
@ -265,7 +267,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
// collect used shader modifications to translate.
|
||||
std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
|
||||
// <Shader hash, modification bits>.
|
||||
std::set<std::pair<uint64_t, uint32_t>> shader_translations_needed;
|
||||
std::set<std::pair<uint64_t, uint64_t>> shader_translations_needed;
|
||||
auto pipeline_storage_file_path =
|
||||
shader_storage_shareable_root /
|
||||
fmt::format("{:08X}.{}.d3d12.xpso", title_id,
|
||||
|
@ -292,7 +294,6 @@ void PipelineCache::InitializeShaderStorage(
|
|||
uint32_t magic;
|
||||
uint32_t magic_api;
|
||||
uint32_t version_swapped;
|
||||
uint32_t device_features;
|
||||
} pipeline_storage_file_header;
|
||||
if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
|
||||
1, pipeline_storage_file_) &&
|
||||
|
@ -331,6 +332,9 @@ void PipelineCache::InitializeShaderStorage(
|
|||
pipeline_stored_descriptions.resize(i);
|
||||
break;
|
||||
}
|
||||
// TODO(Triang3l): On Vulkan, skip pipelines requiring unsupported
|
||||
// device features (to keep the cache files mostly shareable across
|
||||
// devices).
|
||||
// Mark the shader modifications as needed for translation.
|
||||
shader_translations_needed.emplace(
|
||||
pipeline_stored_description.description.vertex_shader_hash,
|
||||
|
@ -391,14 +395,14 @@ void PipelineCache::InitializeShaderStorage(
|
|||
// Threads overlapping file reading.
|
||||
std::mutex shaders_translation_thread_mutex;
|
||||
std::condition_variable shaders_translation_thread_cond;
|
||||
std::deque<std::pair<ShaderStoredHeader, D3D12Shader::D3D12Translation*>>
|
||||
shaders_to_translate;
|
||||
std::deque<D3D12Shader*> shaders_to_translate;
|
||||
size_t shader_translation_threads_busy = 0;
|
||||
bool shader_translation_threads_shutdown = false;
|
||||
std::mutex shaders_failed_to_translate_mutex;
|
||||
std::vector<D3D12Shader::D3D12Translation*> shaders_failed_to_translate;
|
||||
auto shader_translation_thread_function = [&]() {
|
||||
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
||||
StringBuffer ucode_disasm_buffer;
|
||||
DxbcShaderTranslator translator(
|
||||
provider.GetAdapterVendorID(), bindless_resources_used_,
|
||||
edram_rov_used_, provider.GetGraphicsAnalysis() != nullptr);
|
||||
|
@ -416,8 +420,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
IID_PPV_ARGS(&dxc_compiler));
|
||||
}
|
||||
for (;;) {
|
||||
std::pair<ShaderStoredHeader, D3D12Shader::D3D12Translation*>
|
||||
shader_to_translate;
|
||||
D3D12Shader* shader_to_translate;
|
||||
for (;;) {
|
||||
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
if (shaders_to_translate.empty()) {
|
||||
|
@ -432,12 +435,29 @@ void PipelineCache::InitializeShaderStorage(
|
|||
++shader_translation_threads_busy;
|
||||
break;
|
||||
}
|
||||
assert_not_null(shader_to_translate.second);
|
||||
if (!TranslateShader(translator, *shader_to_translate.second,
|
||||
shader_to_translate.first.sq_program_cntl,
|
||||
dxbc_converter, dxc_utils, dxc_compiler)) {
|
||||
std::lock_guard<std::mutex> lock(shaders_failed_to_translate_mutex);
|
||||
shaders_failed_to_translate.push_back(shader_to_translate.second);
|
||||
shader_to_translate->AnalyzeUcode(ucode_disasm_buffer);
|
||||
// Translate each needed modification on this thread after performing
|
||||
// modification-independent analysis of the whole shader.
|
||||
uint64_t ucode_data_hash = shader_to_translate->ucode_data_hash();
|
||||
for (auto modification_it = shader_translations_needed.lower_bound(
|
||||
std::make_pair(ucode_data_hash, uint64_t(0)));
|
||||
modification_it != shader_translations_needed.end() &&
|
||||
modification_it->first == ucode_data_hash;
|
||||
++modification_it) {
|
||||
D3D12Shader::D3D12Translation* translation =
|
||||
static_cast<D3D12Shader::D3D12Translation*>(
|
||||
shader_to_translate->GetOrCreateTranslation(
|
||||
modification_it->second));
|
||||
// Only try (and delete in case of failure) if it's a new translation.
|
||||
// If it's a shader previously encountered in the game, translation of
|
||||
// which has failed, and the shader storage is loaded later, keep it
|
||||
// this way not to try to translate it again.
|
||||
if (!translation->is_translated() &&
|
||||
!TranslateAnalyzedShader(translator, *translation, dxbc_converter,
|
||||
dxc_utils, dxc_compiler)) {
|
||||
std::lock_guard<std::mutex> lock(shaders_failed_to_translate_mutex);
|
||||
shaders_failed_to_translate.push_back(translation);
|
||||
}
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
|
@ -477,59 +497,41 @@ void PipelineCache::InitializeShaderStorage(
|
|||
break;
|
||||
}
|
||||
shader_storage_valid_bytes += sizeof(shader_header) + ucode_byte_count;
|
||||
// Only add the shader if needed.
|
||||
auto modification_it = shader_translations_needed.lower_bound(
|
||||
std::make_pair(ucode_data_hash, uint32_t(0)));
|
||||
if (modification_it == shader_translations_needed.end() ||
|
||||
modification_it->first != ucode_data_hash) {
|
||||
continue;
|
||||
}
|
||||
D3D12Shader* shader =
|
||||
LoadShader(shader_header.type, ucode_dwords.data(),
|
||||
shader_header.ucode_dword_count, ucode_data_hash);
|
||||
if (shader->ucode_storage_index() == shader_storage_index_) {
|
||||
// Appeared twice in this file for some reason - skip, otherwise race
|
||||
// condition will be caused by translating twice in parallel.
|
||||
continue;
|
||||
}
|
||||
// Loaded from the current storage - don't write again.
|
||||
shader->set_ucode_storage_index(shader_storage_index_);
|
||||
// Translate all the needed modifications.
|
||||
for (; modification_it != shader_translations_needed.end() &&
|
||||
modification_it->first == ucode_data_hash;
|
||||
++modification_it) {
|
||||
bool translation_is_new;
|
||||
D3D12Shader::D3D12Translation* translation =
|
||||
static_cast<D3D12Shader::D3D12Translation*>(
|
||||
shader->GetOrCreateTranslation(modification_it->second,
|
||||
&translation_is_new));
|
||||
if (!translation_is_new) {
|
||||
// Already added - usually shaders aren't added without the intention
|
||||
// of translating them imminently, so don't do additional checks to
|
||||
// actually ensure that translation happens right now (they would
|
||||
// cause a race condition with shaders currently queued for
|
||||
// translation).
|
||||
continue;
|
||||
}
|
||||
// Create new threads if the currently existing threads can't keep up
|
||||
// with file reading, but not more than the number of logical processors
|
||||
// minus one.
|
||||
size_t shader_translation_threads_needed;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
shader_translation_threads_needed =
|
||||
std::min(shader_translation_threads_busy +
|
||||
shaders_to_translate.size() + size_t(1),
|
||||
logical_processor_count - size_t(1));
|
||||
}
|
||||
while (shader_translation_threads.size() <
|
||||
shader_translation_threads_needed) {
|
||||
shader_translation_threads.push_back(xe::threading::Thread::Create(
|
||||
{}, shader_translation_thread_function));
|
||||
shader_translation_threads.back()->set_name("Shader Translation");
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
shaders_to_translate.emplace_back(shader_header, translation);
|
||||
}
|
||||
shaders_translation_thread_cond.notify_one();
|
||||
++shaders_translated;
|
||||
// Create new threads if the currently existing threads can't keep up
|
||||
// with file reading, but not more than the number of logical processors
|
||||
// minus one.
|
||||
size_t shader_translation_threads_needed;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
shader_translation_threads_needed =
|
||||
std::min(shader_translation_threads_busy +
|
||||
shaders_to_translate.size() + size_t(1),
|
||||
logical_processor_count - size_t(1));
|
||||
}
|
||||
while (shader_translation_threads.size() <
|
||||
shader_translation_threads_needed) {
|
||||
shader_translation_threads.push_back(xe::threading::Thread::Create(
|
||||
{}, shader_translation_thread_function));
|
||||
shader_translation_threads.back()->set_name("Shader Translation");
|
||||
}
|
||||
// Request ucode information gathering and translation of all the needed
|
||||
// shaders.
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
shaders_to_translate.push_back(shader);
|
||||
}
|
||||
shaders_translation_thread_cond.notify_one();
|
||||
++shaders_translated;
|
||||
}
|
||||
if (!shader_translation_threads.empty()) {
|
||||
{
|
||||
|
@ -593,6 +595,8 @@ void PipelineCache::InitializeShaderStorage(
|
|||
pipeline_stored_descriptions) {
|
||||
const PipelineDescription& pipeline_description =
|
||||
pipeline_stored_description.description;
|
||||
// TODO(Triang3l): On Vulkan, skip pipelines requiring unsupported device
|
||||
// features (to keep the cache files mostly shareable across devices).
|
||||
// Skip already known pipelines - those have already been enqueued.
|
||||
auto found_range =
|
||||
pipelines_.equal_range(pipeline_stored_description.description_hash);
|
||||
|
@ -621,6 +625,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
vertex_shader->GetTranslation(
|
||||
pipeline_description.vertex_shader_modification));
|
||||
if (!pipeline_runtime_description.vertex_shader ||
|
||||
!pipeline_runtime_description.vertex_shader->is_translated() ||
|
||||
!pipeline_runtime_description.vertex_shader->is_valid()) {
|
||||
continue;
|
||||
}
|
||||
|
@ -637,6 +642,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
pixel_shader->GetTranslation(
|
||||
pipeline_description.pixel_shader_modification));
|
||||
if (!pipeline_runtime_description.pixel_shader ||
|
||||
!pipeline_runtime_description.pixel_shader->is_translated() ||
|
||||
!pipeline_runtime_description.pixel_shader->is_valid()) {
|
||||
continue;
|
||||
}
|
||||
|
@ -730,9 +736,6 @@ void PipelineCache::InitializeShaderStorage(
|
|||
pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
|
||||
pipeline_storage_file_header.version_swapped =
|
||||
pipeline_storage_version_swapped;
|
||||
// Reserved for future (for Vulkan) - host device features affecting legal
|
||||
// pipeline descriptions.
|
||||
pipeline_storage_file_header.device_features = 0;
|
||||
fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
|
||||
1, pipeline_storage_file_);
|
||||
}
|
||||
|
@ -854,52 +857,68 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
return shader;
|
||||
}
|
||||
|
||||
bool PipelineCache::GetCurrentShaderModifications(
|
||||
bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
DxbcShaderTranslator::Modification& vertex_shader_modification_out,
|
||||
DxbcShaderTranslator::Modification& pixel_shader_modification_out) const {
|
||||
DxbcShaderTranslator::Modification& pixel_shader_modification_out) {
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
GetCurrentHostVertexShaderTypeIfValid();
|
||||
if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) {
|
||||
return false;
|
||||
}
|
||||
const auto& regs = register_file_;
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
|
||||
vertex_shader->AnalyzeUcode(ucode_disasm_buffer_);
|
||||
vertex_shader_modification_out = DxbcShaderTranslator::Modification(
|
||||
shader_translator_->GetDefaultModification(xenos::ShaderType::kVertex,
|
||||
host_vertex_shader_type));
|
||||
DxbcShaderTranslator::Modification pixel_shader_modification(
|
||||
shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel));
|
||||
if (!edram_rov_used_) {
|
||||
const auto& regs = register_file_;
|
||||
using DepthStencilMode =
|
||||
DxbcShaderTranslator::Modification::DepthStencilMode;
|
||||
if ((depth_float24_conversion_ ==
|
||||
flags::DepthFloat24Conversion::kOnOutputTruncating ||
|
||||
depth_float24_conversion_ ==
|
||||
flags::DepthFloat24Conversion::kOnOutputRounding) &&
|
||||
regs.Get<reg::RB_DEPTHCONTROL>().z_enable &&
|
||||
regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8) {
|
||||
pixel_shader_modification.depth_stencil_mode =
|
||||
depth_float24_conversion_ ==
|
||||
flags::DepthFloat24Conversion::kOnOutputTruncating
|
||||
? DepthStencilMode::kFloat24Truncating
|
||||
: DepthStencilMode::kFloat24Rounding;
|
||||
} else {
|
||||
// Hint to enable early depth/stencil writing if possible - whether it
|
||||
// will actually take effect depends on the shader itself, it's not known
|
||||
// before translation.
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
if ((!rb_colorcontrol.alpha_test_enable ||
|
||||
rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) &&
|
||||
!rb_colorcontrol.alpha_to_mask_enable) {
|
||||
shader_translator_->GetDefaultModification(
|
||||
xenos::ShaderType::kVertex,
|
||||
vertex_shader->GetDynamicAddressableRegisterCount(
|
||||
sq_program_cntl.vs_num_reg),
|
||||
host_vertex_shader_type));
|
||||
|
||||
if (pixel_shader) {
|
||||
pixel_shader->AnalyzeUcode(ucode_disasm_buffer_);
|
||||
DxbcShaderTranslator::Modification pixel_shader_modification(
|
||||
shader_translator_->GetDefaultModification(
|
||||
xenos::ShaderType::kPixel,
|
||||
pixel_shader->GetDynamicAddressableRegisterCount(
|
||||
sq_program_cntl.ps_num_reg)));
|
||||
if (!edram_rov_used_) {
|
||||
using DepthStencilMode =
|
||||
DxbcShaderTranslator::Modification::DepthStencilMode;
|
||||
if ((depth_float24_conversion_ ==
|
||||
flags::DepthFloat24Conversion::kOnOutputTruncating ||
|
||||
depth_float24_conversion_ ==
|
||||
flags::DepthFloat24Conversion::kOnOutputRounding) &&
|
||||
regs.Get<reg::RB_DEPTHCONTROL>().z_enable &&
|
||||
regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8) {
|
||||
pixel_shader_modification.depth_stencil_mode =
|
||||
DepthStencilMode::kEarlyHint;
|
||||
depth_float24_conversion_ ==
|
||||
flags::DepthFloat24Conversion::kOnOutputTruncating
|
||||
? DepthStencilMode::kFloat24Truncating
|
||||
: DepthStencilMode::kFloat24Rounding;
|
||||
} else {
|
||||
pixel_shader_modification.depth_stencil_mode =
|
||||
DepthStencilMode::kNoModifiers;
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
if (pixel_shader->implicit_early_z_write_allowed() &&
|
||||
(!rb_colorcontrol.alpha_test_enable ||
|
||||
rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) &&
|
||||
!rb_colorcontrol.alpha_to_mask_enable) {
|
||||
pixel_shader_modification.depth_stencil_mode =
|
||||
DepthStencilMode::kEarlyHint;
|
||||
} else {
|
||||
pixel_shader_modification.depth_stencil_mode =
|
||||
DepthStencilMode::kNoModifiers;
|
||||
}
|
||||
}
|
||||
}
|
||||
pixel_shader_modification_out = pixel_shader_modification;
|
||||
} else {
|
||||
pixel_shader_modification_out = DxbcShaderTranslator::Modification(
|
||||
shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel,
|
||||
0));
|
||||
}
|
||||
pixel_shader_modification_out = pixel_shader_modification;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -979,62 +998,6 @@ PipelineCache::GetCurrentHostVertexShaderTypeIfValid() const {
|
|||
return Shader::HostVertexShaderType(-1);
|
||||
}
|
||||
|
||||
bool PipelineCache::EnsureShadersTranslated(
|
||||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader) {
|
||||
const auto& regs = register_file_;
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
|
||||
// Edge flags are not supported yet (because polygon primitives are not).
|
||||
assert_true(sq_program_cntl.vs_export_mode !=
|
||||
xenos::VertexShaderExportMode::kPosition2VectorsEdge &&
|
||||
sq_program_cntl.vs_export_mode !=
|
||||
xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill);
|
||||
assert_false(sq_program_cntl.gen_index_vtx);
|
||||
|
||||
if (!vertex_shader->is_translated()) {
|
||||
if (!TranslateShader(*shader_translator_, *vertex_shader, sq_program_cntl,
|
||||
dxbc_converter_, dxc_utils_, dxc_compiler_)) {
|
||||
XELOGE("Failed to translate the vertex shader!");
|
||||
return false;
|
||||
}
|
||||
if (shader_storage_file_ && vertex_shader->shader().ucode_storage_index() !=
|
||||
shader_storage_index_) {
|
||||
vertex_shader->shader().set_ucode_storage_index(shader_storage_index_);
|
||||
assert_not_null(storage_write_thread_);
|
||||
shader_storage_file_flush_needed_ = true;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
storage_write_shader_queue_.push_back(
|
||||
std::make_pair(&vertex_shader->shader(), sq_program_cntl));
|
||||
}
|
||||
storage_write_request_cond_.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
if (pixel_shader != nullptr && !pixel_shader->is_translated()) {
|
||||
if (!TranslateShader(*shader_translator_, *pixel_shader, sq_program_cntl,
|
||||
dxbc_converter_, dxc_utils_, dxc_compiler_)) {
|
||||
XELOGE("Failed to translate the pixel shader!");
|
||||
return false;
|
||||
}
|
||||
if (shader_storage_file_ &&
|
||||
pixel_shader->shader().ucode_storage_index() != shader_storage_index_) {
|
||||
pixel_shader->shader().set_ucode_storage_index(shader_storage_index_);
|
||||
assert_not_null(storage_write_thread_);
|
||||
shader_storage_file_flush_needed_ = true;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
storage_write_shader_queue_.push_back(
|
||||
std::make_pair(&pixel_shader->shader(), sq_program_cntl));
|
||||
}
|
||||
storage_write_request_cond_.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PipelineCache::ConfigurePipeline(
|
||||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
|
@ -1078,8 +1041,50 @@ bool PipelineCache::ConfigurePipeline(
|
|||
}
|
||||
}
|
||||
|
||||
if (!EnsureShadersTranslated(vertex_shader, pixel_shader)) {
|
||||
return false;
|
||||
// Ensure shaders are translated.
|
||||
// Edge flags are not supported yet (because polygon primitives are not).
|
||||
assert_true(register_file_.Get<reg::SQ_PROGRAM_CNTL>().vs_export_mode !=
|
||||
xenos::VertexShaderExportMode::kPosition2VectorsEdge &&
|
||||
register_file_.Get<reg::SQ_PROGRAM_CNTL>().vs_export_mode !=
|
||||
xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill);
|
||||
assert_false(register_file_.Get<reg::SQ_PROGRAM_CNTL>().gen_index_vtx);
|
||||
if (!vertex_shader->is_translated()) {
|
||||
vertex_shader->shader().AnalyzeUcode(ucode_disasm_buffer_);
|
||||
if (!TranslateAnalyzedShader(*shader_translator_, *vertex_shader,
|
||||
dxbc_converter_, dxc_utils_, dxc_compiler_)) {
|
||||
XELOGE("Failed to translate the vertex shader!");
|
||||
return false;
|
||||
}
|
||||
if (shader_storage_file_ && vertex_shader->shader().ucode_storage_index() !=
|
||||
shader_storage_index_) {
|
||||
vertex_shader->shader().set_ucode_storage_index(shader_storage_index_);
|
||||
assert_not_null(storage_write_thread_);
|
||||
shader_storage_file_flush_needed_ = true;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
storage_write_shader_queue_.push_back(&vertex_shader->shader());
|
||||
}
|
||||
storage_write_request_cond_.notify_all();
|
||||
}
|
||||
}
|
||||
if (pixel_shader != nullptr && !pixel_shader->is_translated()) {
|
||||
pixel_shader->shader().AnalyzeUcode(ucode_disasm_buffer_);
|
||||
if (!TranslateAnalyzedShader(*shader_translator_, *pixel_shader,
|
||||
dxbc_converter_, dxc_utils_, dxc_compiler_)) {
|
||||
XELOGE("Failed to translate the pixel shader!");
|
||||
return false;
|
||||
}
|
||||
if (shader_storage_file_ &&
|
||||
pixel_shader->shader().ucode_storage_index() != shader_storage_index_) {
|
||||
pixel_shader->shader().set_ucode_storage_index(shader_storage_index_);
|
||||
assert_not_null(storage_write_thread_);
|
||||
shader_storage_file_flush_needed_ = true;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
storage_write_shader_queue_.push_back(&pixel_shader->shader());
|
||||
}
|
||||
storage_write_request_cond_.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
Pipeline* new_pipeline = new Pipeline;
|
||||
|
@ -1121,17 +1126,15 @@ bool PipelineCache::ConfigurePipeline(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator,
|
||||
D3D12Shader::D3D12Translation& translation,
|
||||
reg::SQ_PROGRAM_CNTL cntl,
|
||||
IDxbcConverter* dxbc_converter,
|
||||
IDxcUtils* dxc_utils,
|
||||
IDxcCompiler* dxc_compiler) {
|
||||
bool PipelineCache::TranslateAnalyzedShader(
|
||||
DxbcShaderTranslator& translator,
|
||||
D3D12Shader::D3D12Translation& translation, IDxbcConverter* dxbc_converter,
|
||||
IDxcUtils* dxc_utils, IDxcCompiler* dxc_compiler) {
|
||||
D3D12Shader& shader = static_cast<D3D12Shader&>(translation.shader());
|
||||
|
||||
// Perform translation.
|
||||
// If this fails the shader will be marked as invalid and ignored later.
|
||||
if (!translator.Translate(translation, cntl)) {
|
||||
if (!translator.TranslateAnalyzedShader(translation)) {
|
||||
XELOGE("Shader {:016X} translation failed; marking as ignored",
|
||||
shader.ucode_data_hash());
|
||||
return false;
|
||||
|
@ -1171,21 +1174,21 @@ bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator,
|
|||
|
||||
// Set up texture and sampler binding layouts.
|
||||
if (shader.EnterBindingLayoutUserUIDSetup()) {
|
||||
uint32_t texture_binding_count;
|
||||
const D3D12Shader::TextureBinding* texture_bindings =
|
||||
shader.GetTextureBindings(texture_binding_count);
|
||||
uint32_t sampler_binding_count;
|
||||
const D3D12Shader::SamplerBinding* sampler_bindings =
|
||||
shader.GetSamplerBindings(sampler_binding_count);
|
||||
const std::vector<D3D12Shader::TextureBinding>& texture_bindings =
|
||||
shader.GetTextureBindingsAfterTranslation();
|
||||
uint32_t texture_binding_count = uint32_t(texture_bindings.size());
|
||||
const std::vector<D3D12Shader::SamplerBinding>& sampler_bindings =
|
||||
shader.GetSamplerBindingsAfterTranslation();
|
||||
uint32_t sampler_binding_count = uint32_t(sampler_bindings.size());
|
||||
assert_false(bindless_resources_used_ &&
|
||||
texture_binding_count + sampler_binding_count >
|
||||
D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4);
|
||||
size_t texture_binding_layout_bytes =
|
||||
texture_binding_count * sizeof(*texture_bindings);
|
||||
texture_binding_count * sizeof(*texture_bindings.data());
|
||||
uint64_t texture_binding_layout_hash = 0;
|
||||
if (texture_binding_count) {
|
||||
texture_binding_layout_hash =
|
||||
XXH3_64bits(texture_bindings, texture_binding_layout_bytes);
|
||||
XXH3_64bits(texture_bindings.data(), texture_binding_layout_bytes);
|
||||
}
|
||||
uint32_t bindless_sampler_count =
|
||||
bindless_resources_used_ ? sampler_binding_count : 0;
|
||||
|
@ -1223,7 +1226,8 @@ bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator,
|
|||
if (it->second.vector_span_length == texture_binding_count &&
|
||||
!std::memcmp(texture_binding_layouts_.data() +
|
||||
it->second.vector_span_offset,
|
||||
texture_bindings, texture_binding_layout_bytes)) {
|
||||
texture_bindings.data(),
|
||||
texture_binding_layout_bytes)) {
|
||||
texture_binding_layout_uid = it->second.uid;
|
||||
break;
|
||||
}
|
||||
|
@ -1242,7 +1246,7 @@ bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator,
|
|||
texture_binding_count);
|
||||
std::memcpy(
|
||||
texture_binding_layouts_.data() + new_uid.vector_span_offset,
|
||||
texture_bindings, texture_binding_layout_bytes);
|
||||
texture_bindings.data(), texture_binding_layout_bytes);
|
||||
texture_binding_layout_map_.emplace(texture_binding_layout_hash,
|
||||
new_uid);
|
||||
}
|
||||
|
@ -1576,8 +1580,10 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
|
||||
// Render targets and blending state. 32 because of 0x1F mask, for safety
|
||||
// (all unknown to zero).
|
||||
uint32_t color_mask = command_processor_.GetCurrentColorMask(
|
||||
pixel_shader ? &pixel_shader->shader() : nullptr);
|
||||
uint32_t color_mask =
|
||||
pixel_shader ? command_processor_.GetCurrentColorMask(
|
||||
pixel_shader->shader().writes_color_targets())
|
||||
: 0;
|
||||
static const PipelineBlendFactor kBlendFactorMap[32] = {
|
||||
/* 0 */ PipelineBlendFactor::kZero,
|
||||
/* 1 */ PipelineBlendFactor::kOne,
|
||||
|
@ -2038,7 +2044,7 @@ void PipelineCache::StorageWriteThread() {
|
|||
fflush(pipeline_storage_file_);
|
||||
}
|
||||
|
||||
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
|
||||
const Shader* shader = nullptr;
|
||||
PipelineStoredDescription pipeline_description;
|
||||
bool write_pipeline = false;
|
||||
{
|
||||
|
@ -2047,7 +2053,7 @@ void PipelineCache::StorageWriteThread() {
|
|||
return;
|
||||
}
|
||||
if (!storage_write_shader_queue_.empty()) {
|
||||
shader_pair = storage_write_shader_queue_.front();
|
||||
shader = storage_write_shader_queue_.front();
|
||||
storage_write_shader_queue_.pop_front();
|
||||
} else if (storage_write_flush_shaders_) {
|
||||
storage_write_flush_shaders_ = false;
|
||||
|
@ -2063,18 +2069,16 @@ void PipelineCache::StorageWriteThread() {
|
|||
storage_write_flush_pipelines_ = false;
|
||||
flush_pipelines = true;
|
||||
}
|
||||
if (!shader_pair.first && !write_pipeline) {
|
||||
if (!shader && !write_pipeline) {
|
||||
storage_write_request_cond_.wait(lock);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const Shader* shader = shader_pair.first;
|
||||
if (shader) {
|
||||
shader_header.ucode_data_hash = shader->ucode_data_hash();
|
||||
shader_header.ucode_dword_count = shader->ucode_dword_count();
|
||||
shader_header.type = shader->type();
|
||||
shader_header.sq_program_cntl = shader_pair.second;
|
||||
assert_not_null(shader_storage_file_);
|
||||
fwrite(&shader_header, sizeof(shader_header), 1, shader_storage_file_);
|
||||
if (shader_header.ucode_dword_count) {
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/base/threading.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||
|
@ -63,15 +64,12 @@ class PipelineCache {
|
|||
D3D12Shader* LoadShader(xenos::ShaderType shader_type,
|
||||
const uint32_t* host_address, uint32_t dword_count);
|
||||
|
||||
// Retrieves the shader modifications for the current state, and returns
|
||||
// whether they are valid.
|
||||
bool GetCurrentShaderModifications(
|
||||
// Ensures microcode is analyzed, retrieves the shader modifications for the
|
||||
// current state, and returns whether they are valid.
|
||||
bool AnalyzeShaderUcodeAndGetCurrentModifications(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
DxbcShaderTranslator::Modification& vertex_shader_modification_out,
|
||||
DxbcShaderTranslator::Modification& pixel_shader_modification_out) const;
|
||||
|
||||
// Translates shaders if needed, also making shader info up to date.
|
||||
bool EnsureShadersTranslated(D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader);
|
||||
DxbcShaderTranslator::Modification& pixel_shader_modification_out);
|
||||
|
||||
bool ConfigurePipeline(
|
||||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
|
@ -93,9 +91,7 @@ class PipelineCache {
|
|||
uint32_t ucode_dword_count : 31;
|
||||
xenos::ShaderType type : 1;
|
||||
|
||||
reg::SQ_PROGRAM_CNTL sq_program_cntl;
|
||||
|
||||
static constexpr uint32_t kVersion = 0x20201207;
|
||||
static constexpr uint32_t kVersion = 0x20201219;
|
||||
});
|
||||
|
||||
// Update PipelineDescription::kVersion if any of the Pipeline* enums are
|
||||
|
@ -171,10 +167,10 @@ class PipelineCache {
|
|||
|
||||
XEPACKEDSTRUCT(PipelineDescription, {
|
||||
uint64_t vertex_shader_hash;
|
||||
uint64_t vertex_shader_modification;
|
||||
// 0 if drawing without a pixel shader.
|
||||
uint64_t pixel_shader_hash;
|
||||
uint32_t vertex_shader_modification;
|
||||
uint32_t pixel_shader_modification;
|
||||
uint64_t pixel_shader_modification;
|
||||
|
||||
int32_t depth_bias;
|
||||
float depth_bias_slope_scaled;
|
||||
|
@ -208,7 +204,7 @@ class PipelineCache {
|
|||
|
||||
PipelineRenderTarget render_targets[4];
|
||||
|
||||
static constexpr uint32_t kVersion = 0x20201207;
|
||||
static constexpr uint32_t kVersion = 0x20201219;
|
||||
});
|
||||
|
||||
XEPACKEDSTRUCT(PipelineStoredDescription, {
|
||||
|
@ -232,12 +228,11 @@ class PipelineCache {
|
|||
uint64_t data_hash);
|
||||
|
||||
// Can be called from multiple threads.
|
||||
bool TranslateShader(DxbcShaderTranslator& translator,
|
||||
D3D12Shader::D3D12Translation& translation,
|
||||
reg::SQ_PROGRAM_CNTL cntl,
|
||||
IDxbcConverter* dxbc_converter = nullptr,
|
||||
IDxcUtils* dxc_utils = nullptr,
|
||||
IDxcCompiler* dxc_compiler = nullptr);
|
||||
bool TranslateAnalyzedShader(DxbcShaderTranslator& translator,
|
||||
D3D12Shader::D3D12Translation& translation,
|
||||
IDxbcConverter* dxbc_converter = nullptr,
|
||||
IDxcUtils* dxc_utils = nullptr,
|
||||
IDxcCompiler* dxc_compiler = nullptr);
|
||||
|
||||
bool GetCurrentStateDescription(
|
||||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
|
@ -257,7 +252,9 @@ class PipelineCache {
|
|||
flags::DepthFloat24Conversion depth_float24_conversion_;
|
||||
uint32_t resolution_scale_;
|
||||
|
||||
// Reusable shader translator.
|
||||
// Temporary storage for AnalyzeUcode calls on the processor thread.
|
||||
StringBuffer ucode_disasm_buffer_;
|
||||
// Reusable shader translator for the processor thread.
|
||||
std::unique_ptr<DxbcShaderTranslator> shader_translator_;
|
||||
|
||||
// Command processor thread DXIL conversion/disassembly interfaces, if DXIL
|
||||
|
@ -332,8 +329,7 @@ class PipelineCache {
|
|||
std::condition_variable storage_write_request_cond_;
|
||||
// Storage thread input is protected with storage_write_request_lock_, and the
|
||||
// thread is notified about its change via storage_write_request_cond_.
|
||||
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
|
||||
storage_write_shader_queue_;
|
||||
std::deque<const Shader*> storage_write_shader_queue_;
|
||||
std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
|
||||
bool storage_write_flush_shaders_ = false;
|
||||
bool storage_write_flush_pipelines_ = false;
|
||||
|
|
|
@ -535,7 +535,8 @@ void RenderTargetCache::EndFrame() {
|
|||
FlushAndUnbindRenderTargets();
|
||||
}
|
||||
|
||||
bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
||||
bool RenderTargetCache::UpdateRenderTargets(
|
||||
uint32_t shader_writes_color_targets) {
|
||||
// There are two kinds of render target binding updates in this implementation
|
||||
// in case something has been changed - full and partial.
|
||||
//
|
||||
|
@ -635,7 +636,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
uint32_t edram_bases[5];
|
||||
uint32_t formats[5];
|
||||
bool formats_are_64bpp[5];
|
||||
uint32_t color_mask = command_processor_.GetCurrentColorMask(pixel_shader);
|
||||
uint32_t color_mask =
|
||||
command_processor_.GetCurrentColorMask(shader_writes_color_targets);
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
enabled[i] = (color_mask & (0xF << (i * 4))) != 0;
|
||||
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||
|
|
|
@ -269,7 +269,7 @@ class RenderTargetCache {
|
|||
void EndFrame();
|
||||
// Called in the beginning of a draw call - may bind pipelines and change the
|
||||
// view descriptor heap.
|
||||
bool UpdateRenderTargets(const D3D12Shader* pixel_shader);
|
||||
bool UpdateRenderTargets(uint32_t shader_writes_color_targets);
|
||||
// Returns the host-to-guest mappings and host formats of currently bound
|
||||
// render targets for pipeline creation and remapping in shaders. They are
|
||||
// consecutive, and format DXGI_FORMAT_UNKNOWN terminates the list. Depth
|
||||
|
|
|
@ -1334,8 +1334,8 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
|||
bool TextureCache::AreActiveTextureSRVKeysUpToDate(
|
||||
const TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
uint32_t host_shader_binding_count) const {
|
||||
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
|
||||
size_t host_shader_binding_count) const {
|
||||
for (size_t i = 0; i < host_shader_binding_count; ++i) {
|
||||
const TextureSRVKey& key = keys[i];
|
||||
const TextureBinding& binding =
|
||||
texture_bindings_[host_shader_bindings[i].fetch_constant];
|
||||
|
@ -1350,8 +1350,8 @@ bool TextureCache::AreActiveTextureSRVKeysUpToDate(
|
|||
void TextureCache::WriteActiveTextureSRVKeys(
|
||||
TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
uint32_t host_shader_binding_count) const {
|
||||
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
|
||||
size_t host_shader_binding_count) const {
|
||||
for (size_t i = 0; i < host_shader_binding_count; ++i) {
|
||||
TextureSRVKey& key = keys[i];
|
||||
const TextureBinding& binding =
|
||||
texture_bindings_[host_shader_bindings[i].fetch_constant];
|
||||
|
|
|
@ -196,14 +196,14 @@ class TextureCache {
|
|||
bool AreActiveTextureSRVKeysUpToDate(
|
||||
const TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
uint32_t host_shader_binding_count) const;
|
||||
size_t host_shader_binding_count) const;
|
||||
// Exports the current binding data to texture SRV keys so they can be stored
|
||||
// for checking whether subsequent draw calls can keep using the same
|
||||
// bindings. Write host_shader_binding_count keys.
|
||||
void WriteActiveTextureSRVKeys(
|
||||
TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
uint32_t host_shader_binding_count) const;
|
||||
size_t host_shader_binding_count) const;
|
||||
// Returns the post-swizzle signedness of a currently bound texture (must be
|
||||
// called after RequestTextures).
|
||||
uint8_t GetActiveTextureSwizzledSigns(uint32_t index) const {
|
||||
|
|
|
@ -19,7 +19,7 @@ DxbcShader::DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
|||
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
|
||||
|
||||
Shader::Translation* DxbcShader::CreateTranslationInstance(
|
||||
uint32_t modification) {
|
||||
uint64_t modification) {
|
||||
return new DxbcTranslation(*this, modification);
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#ifndef XENIA_GPU_DXBC_SHADER_H_
|
||||
#define XENIA_GPU_DXBC_SHADER_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
|
@ -23,13 +24,17 @@ class DxbcShader : public Shader {
|
|||
public:
|
||||
class DxbcTranslation : public Translation {
|
||||
public:
|
||||
DxbcTranslation(DxbcShader& shader, uint32_t modification)
|
||||
DxbcTranslation(DxbcShader& shader, uint64_t modification)
|
||||
: Translation(shader, modification) {}
|
||||
};
|
||||
|
||||
DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||
|
||||
// Resource bindings are gathered after the successful translation of any
|
||||
// modification for simplicity of translation (and they don't depend on
|
||||
// modification bits).
|
||||
|
||||
static constexpr uint32_t kMaxTextureBindingIndexBits =
|
||||
DxbcShaderTranslator::kMaxTextureBindingIndexBits;
|
||||
static constexpr uint32_t kMaxTextureBindings =
|
||||
|
@ -43,11 +48,13 @@ class DxbcShader : public Shader {
|
|||
bool is_signed;
|
||||
};
|
||||
// Safe to hash and compare with memcmp for layout hashing.
|
||||
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(texture_bindings_.size());
|
||||
return texture_bindings_.data();
|
||||
const std::vector<TextureBinding>& GetTextureBindingsAfterTranslation()
|
||||
const {
|
||||
return texture_bindings_;
|
||||
}
|
||||
const uint32_t GetUsedTextureMaskAfterTranslation() const {
|
||||
return used_texture_mask_;
|
||||
}
|
||||
const uint32_t GetUsedTextureMask() const { return used_texture_mask_; }
|
||||
|
||||
static constexpr uint32_t kMaxSamplerBindingIndexBits =
|
||||
DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
|
||||
|
@ -61,17 +68,18 @@ class DxbcShader : public Shader {
|
|||
xenos::TextureFilter mip_filter;
|
||||
xenos::AnisoFilter aniso_filter;
|
||||
};
|
||||
const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(sampler_bindings_.size());
|
||||
return sampler_bindings_.data();
|
||||
const std::vector<SamplerBinding>& GetSamplerBindingsAfterTranslation()
|
||||
const {
|
||||
return sampler_bindings_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Translation* CreateTranslationInstance(uint32_t modification) override;
|
||||
Translation* CreateTranslationInstance(uint64_t modification) override;
|
||||
|
||||
private:
|
||||
friend class DxbcShaderTranslator;
|
||||
|
||||
std::atomic_flag bindings_setup_entered_ = ATOMIC_FLAG_INIT;
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
std::vector<SamplerBinding> sampler_bindings_;
|
||||
uint32_t used_texture_mask_ = 0;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
|
@ -78,16 +79,23 @@ DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
|
|||
DxbcShaderTranslator::~DxbcShaderTranslator() = default;
|
||||
|
||||
std::vector<uint8_t> DxbcShaderTranslator::CreateDepthOnlyPixelShader() {
|
||||
Reset(xenos::ShaderType::kPixel);
|
||||
is_depth_only_pixel_shader_ = true;
|
||||
StartTranslation();
|
||||
return std::move(CompleteTranslation());
|
||||
// TODO(Triang3l): Handle in a nicer way (is_depth_only_pixel_shader_ is a
|
||||
// leftover from when a Shader object wasn't used during translation).
|
||||
Shader shader(xenos::ShaderType::kPixel, 0, nullptr, 0);
|
||||
shader.AnalyzeUcode(instruction_disassembly_buffer_);
|
||||
Shader::Translation& translation = *shader.GetOrCreateTranslation(0);
|
||||
TranslateAnalyzedShader(translation);
|
||||
is_depth_only_pixel_shader_ = false;
|
||||
return translation.translated_binary();
|
||||
}
|
||||
|
||||
uint32_t DxbcShaderTranslator::GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
uint64_t DxbcShaderTranslator::GetDefaultModification(
|
||||
xenos::ShaderType shader_type, uint32_t dynamic_addressable_register_count,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type) const {
|
||||
Modification shader_modification;
|
||||
shader_modification.dynamic_addressable_register_count =
|
||||
dynamic_addressable_register_count;
|
||||
switch (shader_type) {
|
||||
case xenos::ShaderType::kVertex:
|
||||
shader_modification.host_vertex_shader_type = host_vertex_shader_type;
|
||||
|
@ -100,13 +108,11 @@ uint32_t DxbcShaderTranslator::GetDefaultModification(
|
|||
return shader_modification.value;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::Reset(xenos::ShaderType shader_type) {
|
||||
ShaderTranslator::Reset(shader_type);
|
||||
void DxbcShaderTranslator::Reset() {
|
||||
ShaderTranslator::Reset();
|
||||
|
||||
shader_code_.clear();
|
||||
|
||||
is_depth_only_pixel_shader_ = false;
|
||||
|
||||
cbuffer_count_ = 0;
|
||||
// System constants always used in prologues/epilogues.
|
||||
cbuffer_index_system_constants_ = cbuffer_count_++;
|
||||
|
@ -231,6 +237,10 @@ void DxbcShaderTranslator::DxbcSrc::Write(std::vector<uint32_t>& code,
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t DxbcShaderTranslator::GetModificationRegisterCount() const {
|
||||
return GetDxbcShaderModification().dynamic_addressable_register_count;
|
||||
}
|
||||
|
||||
bool DxbcShaderTranslator::UseSwitchForControlFlow() const {
|
||||
// Xenia crashes on Intel HD Graphics 4000 with switch.
|
||||
return cvars::dxbc_switch && vendor_id_ != 0x8086;
|
||||
|
@ -239,7 +249,8 @@ bool DxbcShaderTranslator::UseSwitchForControlFlow() const {
|
|||
uint32_t DxbcShaderTranslator::PushSystemTemp(uint32_t zero_mask,
|
||||
uint32_t count) {
|
||||
uint32_t register_index = system_temp_count_current_;
|
||||
if (!uses_register_dynamic_addressing() && !is_depth_only_pixel_shader_) {
|
||||
if (!is_depth_only_pixel_shader_ &&
|
||||
!current_shader().uses_register_dynamic_addressing()) {
|
||||
// Guest shader registers first if they're not in x0. Depth-only pixel
|
||||
// shader is a special case of the DXBC translator usage, where there are no
|
||||
// GPRs because there's no shader to translate, and a guest shader is not
|
||||
|
@ -327,10 +338,13 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
|||
return;
|
||||
}
|
||||
|
||||
bool uses_register_dynamic_addressing =
|
||||
current_shader().uses_register_dynamic_addressing();
|
||||
|
||||
// Writing the index to X of GPR 0 - either directly if not using indexable
|
||||
// registers, or via a system temporary register.
|
||||
uint32_t reg;
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
if (uses_register_dynamic_addressing) {
|
||||
reg = PushSystemTemp();
|
||||
} else {
|
||||
reg = 0;
|
||||
|
@ -392,7 +406,7 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
|||
DxbcOpBreak();
|
||||
DxbcOpEndSwitch();
|
||||
|
||||
if (!uses_register_dynamic_addressing()) {
|
||||
if (!uses_register_dynamic_addressing) {
|
||||
// Break register dependency.
|
||||
DxbcOpMov(swap_temp_dest, DxbcSrc::LF(0.0f));
|
||||
}
|
||||
|
@ -409,7 +423,7 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
|||
// Convert to float.
|
||||
DxbcOpIToF(index_dest, index_src);
|
||||
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
if (uses_register_dynamic_addressing) {
|
||||
// Store to indexed GPR 0 in x0[0].
|
||||
DxbcOpMov(DxbcDest::X(0, 0, 0b0001), index_src);
|
||||
PopSystemTemp();
|
||||
|
@ -417,6 +431,9 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
|||
}
|
||||
|
||||
void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
||||
bool uses_register_dynamic_addressing =
|
||||
current_shader().uses_register_dynamic_addressing();
|
||||
|
||||
// Zero the interpolators.
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + i),
|
||||
|
@ -438,13 +455,13 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
// Copy the domain location to r0.xyz.
|
||||
// ZYX swizzle according to Call of Duty 3 and Viva Pinata.
|
||||
in_domain_location_used_ |= 0b0111;
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0111)
|
||||
: DxbcDest::R(0, 0b0111),
|
||||
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0111)
|
||||
: DxbcDest::R(0, 0b0111),
|
||||
DxbcSrc::VDomain(0b000110));
|
||||
if (register_count() >= 2) {
|
||||
// Copy the control point indices (already swapped and converted to
|
||||
// float by the host vertex and hull shaders) to r1.xyz.
|
||||
DxbcDest control_point_index_dest(uses_register_dynamic_addressing()
|
||||
DxbcDest control_point_index_dest(uses_register_dynamic_addressing
|
||||
? DxbcDest::X(0, 1)
|
||||
: DxbcDest::R(1));
|
||||
in_control_point_index_used_ = true;
|
||||
|
@ -465,16 +482,16 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
// ZYX swizzle with r1.y == 0, according to the water shader in
|
||||
// Banjo-Kazooie: Nuts & Bolts.
|
||||
in_domain_location_used_ |= 0b0111;
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0111)
|
||||
: DxbcDest::R(0, 0b0111),
|
||||
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0111)
|
||||
: DxbcDest::R(0, 0b0111),
|
||||
DxbcSrc::VDomain(0b000110));
|
||||
if (register_count() >= 2) {
|
||||
// Copy the primitive index to r1.x as a float.
|
||||
uint32_t primitive_id_temp =
|
||||
uses_register_dynamic_addressing() ? PushSystemTemp() : 1;
|
||||
uses_register_dynamic_addressing ? PushSystemTemp() : 1;
|
||||
in_primitive_id_used_ = true;
|
||||
DxbcOpUToF(DxbcDest::R(primitive_id_temp, 0b0001), DxbcSrc::VPrim());
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
if (uses_register_dynamic_addressing) {
|
||||
DxbcOpMov(DxbcDest::X(0, 1, 0b0001),
|
||||
DxbcSrc::R(primitive_id_temp, DxbcSrc::kXXXX));
|
||||
// Release primitive_id_temp.
|
||||
|
@ -499,9 +516,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
//
|
||||
// Direct3D 12 passes the coordinates in a consistent order, so can
|
||||
// just use the identity swizzle.
|
||||
DxbcOpMov(uses_register_dynamic_addressing()
|
||||
? DxbcDest::X(0, 1, 0b0010)
|
||||
: DxbcDest::R(1, 0b0010),
|
||||
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 1, 0b0010)
|
||||
: DxbcDest::R(1, 0b0010),
|
||||
DxbcSrc::LF(0.0f));
|
||||
}
|
||||
}
|
||||
|
@ -512,8 +528,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
if (register_count() >= 1) {
|
||||
// Copy the domain location to r0.xy.
|
||||
in_domain_location_used_ |= 0b0011;
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0011)
|
||||
: DxbcDest::R(0, 0b0011),
|
||||
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0011)
|
||||
: DxbcDest::R(0, 0b0011),
|
||||
DxbcSrc::VDomain());
|
||||
// Control point indices according to the shader from the main menu of
|
||||
// Defender, which starts from `cndeq r2, c255.xxxy, r1.xyzz, r0.zzzz`,
|
||||
|
@ -524,14 +540,13 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
// r1.z for (1 - r0.x) * r0.y
|
||||
in_control_point_index_used_ = true;
|
||||
DxbcOpMov(
|
||||
uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0100)
|
||||
: DxbcDest::R(0, 0b0100),
|
||||
uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0100)
|
||||
: DxbcDest::R(0, 0b0100),
|
||||
DxbcSrc::VICP(0, uint32_t(InOutRegister::kDSInControlPointIndex),
|
||||
DxbcSrc::kXXXX));
|
||||
if (register_count() >= 2) {
|
||||
DxbcDest r1_dest(uses_register_dynamic_addressing()
|
||||
? DxbcDest::X(0, 1)
|
||||
: DxbcDest::R(1));
|
||||
DxbcDest r1_dest(uses_register_dynamic_addressing ? DxbcDest::X(0, 1)
|
||||
: DxbcDest::R(1));
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
DxbcOpMov(
|
||||
r1_dest.Mask(1 << i),
|
||||
|
@ -549,15 +564,15 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
// Copy the domain location to r0.yz.
|
||||
// XY swizzle according to the ground shader in Viva Pinata.
|
||||
in_domain_location_used_ |= 0b0011;
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, 0, 0b0110)
|
||||
: DxbcDest::R(0, 0b0110),
|
||||
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0110)
|
||||
: DxbcDest::R(0, 0b0110),
|
||||
DxbcSrc::VDomain(0b010000));
|
||||
// Copy the primitive index to r0.x as a float.
|
||||
uint32_t primitive_id_temp =
|
||||
uses_register_dynamic_addressing() ? PushSystemTemp() : 0;
|
||||
uses_register_dynamic_addressing ? PushSystemTemp() : 0;
|
||||
in_primitive_id_used_ = true;
|
||||
DxbcOpUToF(DxbcDest::R(primitive_id_temp, 0b0001), DxbcSrc::VPrim());
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
if (uses_register_dynamic_addressing) {
|
||||
DxbcOpMov(DxbcDest::X(0, 0, 0b0001),
|
||||
DxbcSrc::R(primitive_id_temp, DxbcSrc::kXXXX));
|
||||
// Release primitive_id_temp.
|
||||
|
@ -578,9 +593,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
//
|
||||
// Direct3D 12 passes the coordinates in a consistent order, so can
|
||||
// just use the identity swizzle.
|
||||
DxbcOpMov(uses_register_dynamic_addressing()
|
||||
? DxbcDest::X(0, 1, 0b0001)
|
||||
: DxbcDest::R(1, 0b0001),
|
||||
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 1, 0b0001)
|
||||
: DxbcDest::R(1, 0b0001),
|
||||
DxbcSrc::LF(0.0f));
|
||||
}
|
||||
}
|
||||
|
@ -611,7 +625,10 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
return;
|
||||
}
|
||||
|
||||
if (!edram_rov_used_ && writes_depth()) {
|
||||
bool uses_register_dynamic_addressing =
|
||||
current_shader().uses_register_dynamic_addressing();
|
||||
|
||||
if (!edram_rov_used_ && current_shader().writes_depth()) {
|
||||
// Initialize the depth output if used, which must be written to regardless
|
||||
// of the taken execution path.
|
||||
DxbcOpMov(DxbcDest::ODepth(), DxbcSrc::LF(0.0f));
|
||||
|
@ -623,7 +640,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
// Copy interpolants to GPRs.
|
||||
if (edram_rov_used_) {
|
||||
uint32_t centroid_temp =
|
||||
uses_register_dynamic_addressing() ? PushSystemTemp() : UINT32_MAX;
|
||||
uses_register_dynamic_addressing ? PushSystemTemp() : UINT32_MAX;
|
||||
system_constants_used_ |= 1ull
|
||||
<< kSysConst_InterpolatorSamplingPattern_Index;
|
||||
DxbcSrc sampling_pattern_src(
|
||||
|
@ -635,7 +652,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
// With GPR dynamic addressing, first evaluate to centroid_temp r#, then
|
||||
// store to the x#.
|
||||
uint32_t centroid_register =
|
||||
uses_register_dynamic_addressing() ? centroid_temp : i;
|
||||
uses_register_dynamic_addressing ? centroid_temp : i;
|
||||
// Check if the input needs to be interpolated at center (if the bit is
|
||||
// set).
|
||||
DxbcOpAnd(DxbcDest::R(centroid_register, 0b0001), sampling_pattern_src,
|
||||
|
@ -643,8 +660,8 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
DxbcOpIf(bool(xenos::SampleLocation::kCenter),
|
||||
DxbcSrc::R(centroid_register, DxbcSrc::kXXXX));
|
||||
// At center.
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
|
||||
: DxbcDest::R(i),
|
||||
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, i)
|
||||
: DxbcDest::R(i),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i));
|
||||
DxbcOpElse();
|
||||
// At centroid. Not really important that 2x MSAA is emulated using
|
||||
|
@ -653,7 +670,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
DxbcOpEvalCentroid(
|
||||
DxbcDest::R(centroid_register),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i));
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
if (uses_register_dynamic_addressing) {
|
||||
DxbcOpMov(DxbcDest::X(0, i), DxbcSrc::R(centroid_register));
|
||||
}
|
||||
DxbcOpEndIf();
|
||||
|
@ -665,8 +682,8 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
// SSAA instead of MSAA without ROV - everything is interpolated at
|
||||
// samples, can't extrapolate.
|
||||
for (uint32_t i = 0; i < interpolator_count; ++i) {
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
|
||||
: DxbcDest::R(i),
|
||||
DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, i)
|
||||
: DxbcDest::R(i),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i));
|
||||
}
|
||||
}
|
||||
|
@ -781,7 +798,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
}
|
||||
// Write ps_param_gen to the specified GPR.
|
||||
DxbcSrc param_gen_src(DxbcSrc::R(param_gen_temp));
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
if (uses_register_dynamic_addressing) {
|
||||
// Copy the GPR number to r# for relative addressing.
|
||||
uint32_t param_gen_copy_temp = PushSystemTemp();
|
||||
DxbcOpMov(DxbcDest::R(param_gen_copy_temp, 0b0001),
|
||||
|
@ -863,10 +880,12 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
// by the guest code, so initialize because assumptions can't be made
|
||||
// about the integrity of the guest code.
|
||||
system_temp_depth_stencil_ =
|
||||
PushSystemTemp(writes_depth() ? 0b0001 : 0b1111);
|
||||
PushSystemTemp(current_shader().writes_depth() ? 0b0001 : 0b1111);
|
||||
}
|
||||
uint32_t shader_writes_color_targets =
|
||||
current_shader().writes_color_targets();
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (writes_color_target(i)) {
|
||||
if (shader_writes_color_targets & (1 << i)) {
|
||||
system_temps_color_[i] = PushSystemTemp(0b1111);
|
||||
}
|
||||
}
|
||||
|
@ -879,8 +898,8 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
std::memset(system_temps_memexport_data_, 0xFF,
|
||||
sizeof(system_temps_memexport_data_));
|
||||
system_temp_memexport_written_ = UINT32_MAX;
|
||||
const uint8_t* memexports_written = memexport_eM_written();
|
||||
for (uint32_t i = 0; i < kMaxMemExports; ++i) {
|
||||
const uint8_t* memexports_written = current_shader().memexport_eM_written();
|
||||
for (uint32_t i = 0; i < Shader::kMaxMemExports; ++i) {
|
||||
uint32_t memexport_alloc_written = memexports_written[i];
|
||||
if (memexport_alloc_written == 0) {
|
||||
continue;
|
||||
|
@ -915,8 +934,9 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
// references them after only initializing them conditionally.
|
||||
for (uint32_t i = is_pixel_shader() ? xenos::kMaxInterpolators : 0;
|
||||
i < register_count(); ++i) {
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
|
||||
: DxbcDest::R(i),
|
||||
DxbcOpMov(current_shader().uses_register_dynamic_addressing()
|
||||
? DxbcDest::X(0, i)
|
||||
: DxbcDest::R(i),
|
||||
DxbcSrc::LF(0.0f));
|
||||
}
|
||||
}
|
||||
|
@ -1120,7 +1140,7 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
ExportToMemory();
|
||||
|
||||
// Release memexport temporary registers.
|
||||
for (int i = kMaxMemExports - 1; i >= 0; --i) {
|
||||
for (int i = Shader::kMaxMemExports - 1; i >= 0; --i) {
|
||||
if (system_temps_memexport_address_[i] == UINT32_MAX) {
|
||||
continue;
|
||||
}
|
||||
|
@ -1154,8 +1174,10 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
PopSystemTemp(2);
|
||||
} else if (is_pixel_shader()) {
|
||||
// Release system_temps_color_.
|
||||
uint32_t shader_writes_color_targets =
|
||||
current_shader().writes_color_targets();
|
||||
for (int32_t i = 3; i >= 0; --i) {
|
||||
if (writes_color_target(i)) {
|
||||
if (shader_writes_color_targets & (1 << i)) {
|
||||
PopSystemTemp();
|
||||
}
|
||||
}
|
||||
|
@ -1274,40 +1296,42 @@ std::vector<uint8_t> DxbcShaderTranslator::CompleteTranslation() {
|
|||
return shader_object_bytes;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::PostTranslation(
|
||||
Shader::Translation& translation, bool setup_shader_post_translation_info) {
|
||||
if (setup_shader_post_translation_info) {
|
||||
DxbcShader* dxbc_shader = dynamic_cast<DxbcShader*>(&translation.shader());
|
||||
if (dxbc_shader) {
|
||||
dxbc_shader->texture_bindings_.clear();
|
||||
dxbc_shader->texture_bindings_.reserve(texture_bindings_.size());
|
||||
dxbc_shader->used_texture_mask_ = 0;
|
||||
for (const TextureBinding& translator_binding : texture_bindings_) {
|
||||
DxbcShader::TextureBinding& shader_binding =
|
||||
dxbc_shader->texture_bindings_.emplace_back();
|
||||
// For a stable hash.
|
||||
std::memset(&shader_binding, 0, sizeof(shader_binding));
|
||||
shader_binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
shader_binding.fetch_constant = translator_binding.fetch_constant;
|
||||
shader_binding.dimension = translator_binding.dimension;
|
||||
shader_binding.is_signed = translator_binding.is_signed;
|
||||
dxbc_shader->used_texture_mask_ |= 1u
|
||||
<< translator_binding.fetch_constant;
|
||||
}
|
||||
dxbc_shader->sampler_bindings_.clear();
|
||||
dxbc_shader->sampler_bindings_.reserve(sampler_bindings_.size());
|
||||
for (const SamplerBinding& translator_binding : sampler_bindings_) {
|
||||
DxbcShader::SamplerBinding& shader_binding =
|
||||
dxbc_shader->sampler_bindings_.emplace_back();
|
||||
shader_binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
shader_binding.fetch_constant = translator_binding.fetch_constant;
|
||||
shader_binding.mag_filter = translator_binding.mag_filter;
|
||||
shader_binding.min_filter = translator_binding.min_filter;
|
||||
shader_binding.mip_filter = translator_binding.mip_filter;
|
||||
shader_binding.aniso_filter = translator_binding.aniso_filter;
|
||||
}
|
||||
void DxbcShaderTranslator::PostTranslation() {
|
||||
Shader::Translation& translation = current_translation();
|
||||
if (!translation.is_valid()) {
|
||||
return;
|
||||
}
|
||||
DxbcShader* dxbc_shader = dynamic_cast<DxbcShader*>(&translation.shader());
|
||||
if (dxbc_shader && !dxbc_shader->bindings_setup_entered_.test_and_set(
|
||||
std::memory_order_relaxed)) {
|
||||
dxbc_shader->texture_bindings_.clear();
|
||||
dxbc_shader->texture_bindings_.reserve(texture_bindings_.size());
|
||||
dxbc_shader->used_texture_mask_ = 0;
|
||||
for (const TextureBinding& translator_binding : texture_bindings_) {
|
||||
DxbcShader::TextureBinding& shader_binding =
|
||||
dxbc_shader->texture_bindings_.emplace_back();
|
||||
// For a stable hash.
|
||||
std::memset(&shader_binding, 0, sizeof(shader_binding));
|
||||
shader_binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
shader_binding.fetch_constant = translator_binding.fetch_constant;
|
||||
shader_binding.dimension = translator_binding.dimension;
|
||||
shader_binding.is_signed = translator_binding.is_signed;
|
||||
dxbc_shader->used_texture_mask_ |= 1u
|
||||
<< translator_binding.fetch_constant;
|
||||
}
|
||||
dxbc_shader->sampler_bindings_.clear();
|
||||
dxbc_shader->sampler_bindings_.reserve(sampler_bindings_.size());
|
||||
for (const SamplerBinding& translator_binding : sampler_bindings_) {
|
||||
DxbcShader::SamplerBinding& shader_binding =
|
||||
dxbc_shader->sampler_bindings_.emplace_back();
|
||||
shader_binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
shader_binding.fetch_constant = translator_binding.fetch_constant;
|
||||
shader_binding.mag_filter = translator_binding.mag_filter;
|
||||
shader_binding.min_filter = translator_binding.min_filter;
|
||||
shader_binding.mip_filter = translator_binding.mip_filter;
|
||||
shader_binding.aniso_filter = translator_binding.aniso_filter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1373,7 +1397,7 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
|
|||
DxbcSrc src(DxbcSrc::LF(0.0f));
|
||||
switch (operand.storage_source) {
|
||||
case InstructionStorageSource::kRegister: {
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
if (current_shader().uses_register_dynamic_addressing()) {
|
||||
// Load x#[#] to r# because x#[#] can be used only with mov.
|
||||
uint32_t temp = PushSystemTemp();
|
||||
temp_pushed_out = true;
|
||||
|
@ -1402,10 +1426,12 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
|
|||
if (cbuffer_index_float_constants_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_float_constants_ = cbuffer_count_++;
|
||||
}
|
||||
const Shader::ConstantRegisterMap& constant_register_map =
|
||||
current_shader().constant_register_map();
|
||||
if (operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
uint32_t float_constant_index =
|
||||
constant_register_map().GetPackedFloatConstantIndex(
|
||||
constant_register_map.GetPackedFloatConstantIndex(
|
||||
operand.storage_index);
|
||||
assert_true(float_constant_index != UINT32_MAX);
|
||||
if (float_constant_index == UINT32_MAX) {
|
||||
|
@ -1413,7 +1439,7 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
|
|||
}
|
||||
index.index_ = float_constant_index;
|
||||
} else {
|
||||
assert_true(constant_register_map().float_dynamic_addressing);
|
||||
assert_true(constant_register_map.float_dynamic_addressing);
|
||||
}
|
||||
src = DxbcSrc::CB(cbuffer_index_float_constants_,
|
||||
uint32_t(CbufferRegister::kFloatConstants), index);
|
||||
|
@ -1453,7 +1479,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
case InstructionStorageTarget::kNone:
|
||||
return;
|
||||
case InstructionStorageTarget::kRegister:
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
if (current_shader().uses_register_dynamic_addressing()) {
|
||||
DxbcIndex register_index(result.storage_index);
|
||||
switch (result.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic:
|
||||
|
@ -1488,7 +1514,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
case InstructionStorageTarget::kExportAddress:
|
||||
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
||||
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
|
||||
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
|
||||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
|
||||
UINT32_MAX) {
|
||||
return;
|
||||
|
@ -1499,7 +1525,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
case InstructionStorageTarget::kExportData: {
|
||||
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
||||
if (memexport_alloc_current_count_ == 0 ||
|
||||
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
|
||||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||
[result.storage_index] == UINT32_MAX) {
|
||||
return;
|
||||
|
@ -1519,7 +1545,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
} break;
|
||||
case InstructionStorageTarget::kColor:
|
||||
assert_not_zero(used_write_mask);
|
||||
assert_true(writes_color_target(result.storage_index));
|
||||
assert_true(current_shader().writes_color_target(result.storage_index));
|
||||
dest = DxbcDest::R(system_temps_color_[result.storage_index]);
|
||||
if (edram_rov_used_) {
|
||||
// For ROV output, mark that the color has been written to.
|
||||
|
@ -1539,7 +1565,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
// Writes X to scalar oDepth or to X of system_temp_depth_stencil_, no
|
||||
// additional swizzling needed.
|
||||
assert_true(used_write_mask == 0b0001);
|
||||
assert_true(writes_depth());
|
||||
assert_true(current_shader().writes_depth());
|
||||
if (IsDepthStencilSystemTempUsed()) {
|
||||
dest = DxbcDest::R(system_temp_depth_stencil_);
|
||||
} else {
|
||||
|
@ -2077,6 +2103,9 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
uint32_t chunk_position_dwords = uint32_t(shader_object_.size());
|
||||
uint32_t new_offset;
|
||||
|
||||
const Shader::ConstantRegisterMap& constant_register_map =
|
||||
current_shader().constant_register_map();
|
||||
|
||||
// ***************************************************************************
|
||||
// Header
|
||||
// ***************************************************************************
|
||||
|
@ -2162,7 +2191,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// Declaring a 0-sized array may not be safe, so write something valid
|
||||
// even if they aren't used.
|
||||
shader_object_.push_back(
|
||||
std::max(constant_register_map().float_count, uint32_t(1)));
|
||||
std::max(constant_register_map.float_count, uint32_t(1)));
|
||||
break;
|
||||
case RdefTypeIndex::kUint4DescriptorIndexArray:
|
||||
shader_object_.push_back(std::max(
|
||||
|
@ -2278,10 +2307,10 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// Float constants.
|
||||
uint32_t constant_offset_float = new_offset;
|
||||
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
|
||||
assert_not_zero(constant_register_map().float_count);
|
||||
assert_not_zero(constant_register_map.float_count);
|
||||
shader_object_.push_back(constant_name_offset_float);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(constant_register_map().float_count * 4 *
|
||||
shader_object_.push_back(constant_register_map.float_count * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(kDxbcRdefVariableFlagUsed);
|
||||
shader_object_.push_back(types_offset +
|
||||
|
@ -2405,11 +2434,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// No D3D_SHADER_CBUFFER_FLAGS.
|
||||
shader_object_.push_back(0);
|
||||
} else if (i == cbuffer_index_float_constants_) {
|
||||
assert_not_zero(constant_register_map().float_count);
|
||||
assert_not_zero(constant_register_map.float_count);
|
||||
shader_object_.push_back(cbuffer_name_offset_float);
|
||||
shader_object_.push_back(1);
|
||||
shader_object_.push_back(constant_offset_float);
|
||||
shader_object_.push_back(constant_register_map().float_count * 4 *
|
||||
shader_object_.push_back(constant_register_map.float_count * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
|
||||
shader_object_.push_back(0);
|
||||
|
@ -3211,7 +3240,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
if (!edram_rov_used_) {
|
||||
// Color render targets (SV_Target#).
|
||||
size_t target_position = SIZE_MAX;
|
||||
if (writes_any_color_target()) {
|
||||
if (current_shader().writes_color_targets()) {
|
||||
target_position = shader_object_.size();
|
||||
shader_object_.resize(shader_object_.size() + 4 * kParameterDwords);
|
||||
parameter_count += 4;
|
||||
|
@ -3233,7 +3262,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
Modification::DepthStencilMode depth_stencil_mode =
|
||||
GetDxbcShaderModification().depth_stencil_mode;
|
||||
size_t depth_position = SIZE_MAX;
|
||||
if (writes_depth() || DSV_IsWritingFloat24Depth()) {
|
||||
if (current_shader().writes_depth() || DSV_IsWritingFloat24Depth()) {
|
||||
depth_position = shader_object_.size();
|
||||
shader_object_.resize(shader_object_.size() + kParameterDwords);
|
||||
++parameter_count;
|
||||
|
@ -3268,7 +3297,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
depth.semantic_name = semantic_offset;
|
||||
}
|
||||
const char* depth_semantic_name;
|
||||
if (!writes_depth() &&
|
||||
if (!current_shader().writes_depth() &&
|
||||
GetDxbcShaderModification().depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kFloat24Truncating) {
|
||||
depth_semantic_name = "SV_DepthLessEqual";
|
||||
|
@ -3361,7 +3390,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
if (is_pixel_shader() &&
|
||||
GetDxbcShaderModification().depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kEarlyHint &&
|
||||
!edram_rov_used_ && CanWriteZEarly()) {
|
||||
!edram_rov_used_ && current_shader().implicit_early_z_write_allowed()) {
|
||||
global_flags_opcode |= D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL;
|
||||
}
|
||||
shader_object_.push_back(global_flags_opcode);
|
||||
|
@ -3369,11 +3398,13 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
// Constant buffers, from most frequenly accessed to least frequently accessed
|
||||
// (the order is a hint to the driver according to the DXBC header).
|
||||
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
|
||||
assert_not_zero(constant_register_map().float_count);
|
||||
const Shader::ConstantRegisterMap& constant_register_map =
|
||||
current_shader().constant_register_map();
|
||||
assert_not_zero(constant_register_map.float_count);
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
|
||||
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
|
||||
constant_register_map().float_dynamic_addressing
|
||||
constant_register_map.float_dynamic_addressing
|
||||
? D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED
|
||||
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
|
@ -3382,7 +3413,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(cbuffer_index_float_constants_);
|
||||
shader_object_.push_back(uint32_t(CbufferRegister::kFloatConstants));
|
||||
shader_object_.push_back(uint32_t(CbufferRegister::kFloatConstants));
|
||||
shader_object_.push_back(constant_register_map().float_count);
|
||||
shader_object_.push_back(constant_register_map.float_count);
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
|
||||
|
@ -3715,6 +3746,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
++stat_.dcl_count;
|
||||
} else if (is_pixel_shader()) {
|
||||
bool is_writing_float24_depth = DSV_IsWritingFloat24Depth();
|
||||
bool shader_writes_depth = current_shader().writes_depth();
|
||||
// Interpolator input.
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
uint32_t interpolator_count =
|
||||
|
@ -3766,7 +3798,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
// applicable here) position is mandatory. However, with depth output, on
|
||||
// the guest, there's only one depth value for the whole pixel.
|
||||
D3D10_SB_INTERPOLATION_MODE position_interpolation_mode =
|
||||
is_writing_float24_depth && !writes_depth()
|
||||
is_writing_float24_depth && !shader_writes_depth
|
||||
? D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE
|
||||
: D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE;
|
||||
shader_object_.push_back(
|
||||
|
@ -3806,7 +3838,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
EncodeScalarOperand(D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK, 0));
|
||||
++stat_.dcl_count;
|
||||
} else {
|
||||
if (writes_any_color_target()) {
|
||||
if (current_shader().writes_color_targets()) {
|
||||
// Color output.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
shader_object_.push_back(
|
||||
|
@ -3819,9 +3851,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
}
|
||||
// Depth output.
|
||||
if (is_writing_float24_depth || writes_depth()) {
|
||||
if (is_writing_float24_depth || shader_writes_depth) {
|
||||
D3D10_SB_OPERAND_TYPE depth_operand_type;
|
||||
if (!writes_depth() &&
|
||||
if (!shader_writes_depth &&
|
||||
GetDxbcShaderModification().depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kFloat24Truncating) {
|
||||
depth_operand_type = D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL;
|
||||
|
@ -3840,7 +3872,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
// Temporary registers - guest general-purpose registers if not using dynamic
|
||||
// indexing and Xenia internal registers.
|
||||
stat_.temp_register_count = system_temp_count_max_;
|
||||
if (!is_depth_only_pixel_shader_ && !uses_register_dynamic_addressing()) {
|
||||
if (!is_depth_only_pixel_shader_ &&
|
||||
!current_shader().uses_register_dynamic_addressing()) {
|
||||
stat_.temp_register_count += register_count();
|
||||
}
|
||||
if (stat_.temp_register_count != 0) {
|
||||
|
@ -3851,7 +3884,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
|
||||
// General-purpose registers if using dynamic indexing (x0).
|
||||
if (!is_depth_only_pixel_shader_ && uses_register_dynamic_addressing()) {
|
||||
if (!is_depth_only_pixel_shader_ &&
|
||||
current_shader().uses_register_dynamic_addressing()) {
|
||||
assert_true(register_count() != 0);
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) |
|
||||
|
|
|
@ -106,13 +106,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// If anything in this is structure is changed in a way not compatible with
|
||||
// the previous layout, invalidate the pipeline storages by increasing this
|
||||
// version number (0xYYYYMMDD)!
|
||||
static constexpr uint32_t kVersion = 0x20201203;
|
||||
static constexpr uint32_t kVersion = 0x20201219;
|
||||
|
||||
enum class DepthStencilMode : uint32_t {
|
||||
kNoModifiers,
|
||||
// [earlydepthstencil] - enable if alpha test and alpha to coverage are
|
||||
// disabled; ignored if anything in the shader blocks early Z writing
|
||||
// (which is not known before translation, so this will be set anyway).
|
||||
// disabled; ignored if anything in the shader blocks early Z writing.
|
||||
kEarlyHint,
|
||||
// Converting the depth to the closest 32-bit float representable exactly
|
||||
// as a 20e4 float, to support invariance in cases when the guest
|
||||
|
@ -136,15 +135,17 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
};
|
||||
|
||||
struct {
|
||||
// Both - dynamically indexable register count from SQ_PROGRAM_CNTL.
|
||||
uint32_t dynamic_addressable_register_count : 8;
|
||||
// VS - pipeline stage and input configuration.
|
||||
Shader::HostVertexShaderType host_vertex_shader_type
|
||||
: Shader::kHostVertexShaderTypeBitCount;
|
||||
// PS, non-ROV - depth / stencil output mode.
|
||||
DepthStencilMode depth_stencil_mode : 2;
|
||||
};
|
||||
uint32_t value = 0;
|
||||
uint64_t value = 0;
|
||||
|
||||
Modification(uint32_t modification_value = 0) : value(modification_value) {}
|
||||
Modification(uint64_t modification_value = 0) : value(modification_value) {}
|
||||
};
|
||||
|
||||
// Constant buffer bindings in space 0.
|
||||
|
@ -467,8 +468,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
float& clamp_alpha_high, uint32_t& keep_mask_low,
|
||||
uint32_t& keep_mask_high);
|
||||
|
||||
uint32_t GetDefaultModification(
|
||||
uint64_t GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
uint32_t dynamic_addressable_register_count,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex) const override;
|
||||
|
||||
|
@ -477,12 +479,13 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
std::vector<uint8_t> CreateDepthOnlyPixelShader();
|
||||
|
||||
protected:
|
||||
void Reset(xenos::ShaderType shader_type) override;
|
||||
void Reset() override;
|
||||
|
||||
uint32_t GetModificationRegisterCount() const override;
|
||||
|
||||
void StartTranslation() override;
|
||||
std::vector<uint8_t> CompleteTranslation() override;
|
||||
void PostTranslation(Shader::Translation& translation,
|
||||
bool setup_shader_post_translation_info) override;
|
||||
void PostTranslation() override;
|
||||
|
||||
void ProcessLabel(uint32_t cf_index) override;
|
||||
|
||||
|
@ -2184,7 +2187,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
}
|
||||
|
||||
Modification GetDxbcShaderModification() const {
|
||||
return Modification(modification());
|
||||
return Modification(current_translation().modification());
|
||||
}
|
||||
|
||||
bool IsDxbcVertexShader() const {
|
||||
|
@ -2227,9 +2230,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
bool IsDepthStencilSystemTempUsed() const {
|
||||
// See system_temp_depth_stencil_ documentation for explanation of cases.
|
||||
if (edram_rov_used_) {
|
||||
return writes_depth() || ROV_IsDepthStencilEarly();
|
||||
return current_shader().writes_depth() || ROV_IsDepthStencilEarly();
|
||||
}
|
||||
return writes_depth() && DSV_IsWritingFloat24Depth();
|
||||
return current_shader().writes_depth() && DSV_IsWritingFloat24Depth();
|
||||
}
|
||||
// Whether the current non-ROV pixel shader should convert the depth to 20e4.
|
||||
bool DSV_IsWritingFloat24Depth() const {
|
||||
|
@ -2246,8 +2249,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// Whether it's possible and worth skipping running the translated shader for
|
||||
// 2x2 quads.
|
||||
bool ROV_IsDepthStencilEarly() const {
|
||||
return !is_depth_only_pixel_shader_ && !writes_depth() &&
|
||||
memexport_stream_constants().empty();
|
||||
return !is_depth_only_pixel_shader_ && !current_shader().writes_depth() &&
|
||||
current_shader().memexport_stream_constants().empty();
|
||||
}
|
||||
// Converts the depth value to 24-bit (storing the result in bits 0:23 and
|
||||
// zeros in 24:31, not creating room for stencil - since this may be involved
|
||||
|
@ -2467,7 +2470,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
|
||||
// Is currently writing the empty depth-only pixel shader, for
|
||||
// CompleteTranslation.
|
||||
bool is_depth_only_pixel_shader_;
|
||||
bool is_depth_only_pixel_shader_ = false;
|
||||
|
||||
// Data types used in constants buffers. Listed in dependency order.
|
||||
enum class RdefTypeIndex {
|
||||
|
@ -2604,9 +2607,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// 4 `alloc export`s per component.
|
||||
uint32_t system_temp_memexport_written_;
|
||||
// eA in each `alloc export`, or UINT32_MAX if not used.
|
||||
uint32_t system_temps_memexport_address_[kMaxMemExports];
|
||||
uint32_t system_temps_memexport_address_[Shader::kMaxMemExports];
|
||||
// eM# in each `alloc export`, or UINT32_MAX if not used.
|
||||
uint32_t system_temps_memexport_data_[kMaxMemExports][5];
|
||||
uint32_t system_temps_memexport_data_[Shader::kMaxMemExports][5];
|
||||
|
||||
// Vector ALU or fetch result/scratch (since Xenos write masks can contain
|
||||
// swizzles).
|
||||
|
|
|
@ -136,7 +136,7 @@ void DxbcShaderTranslator::ExportToMemory() {
|
|||
DxbcOpIf(true, DxbcSrc::R(control_temp, DxbcSrc::kXXXX));
|
||||
// control_temp.x is now free.
|
||||
|
||||
for (uint32_t i = 0; i < kMaxMemExports; ++i) {
|
||||
for (uint32_t i = 0; i < Shader::kMaxMemExports; ++i) {
|
||||
uint32_t eA_temp = system_temps_memexport_address_[i];
|
||||
if (eA_temp == UINT32_MAX) {
|
||||
// Export not used.
|
||||
|
|
|
@ -144,7 +144,7 @@ void DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
|
|||
}
|
||||
|
||||
void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
||||
bool color_targets_written = writes_any_color_target();
|
||||
bool any_color_targets_written = current_shader().writes_color_targets() != 0;
|
||||
|
||||
// ***************************************************************************
|
||||
// Get EDRAM offsets for the pixel:
|
||||
|
@ -272,7 +272,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0001),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX));
|
||||
if (color_targets_written) {
|
||||
if (any_color_targets_written) {
|
||||
// Write 32bpp color offset to system_temp_rov_params_.z.
|
||||
// system_temp_rov_params_.x = X sample 0 position within the depth tile
|
||||
// system_temp_rov_params_.y = row offset
|
||||
|
@ -303,8 +303,8 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// Release resolution_scale_log2_temp.
|
||||
PopSystemTemp();
|
||||
{
|
||||
DxbcDest offsets_dest(DxbcDest::R(system_temp_rov_params_,
|
||||
color_targets_written ? 0b0110 : 0b0010));
|
||||
DxbcDest offsets_dest(DxbcDest::R(
|
||||
system_temp_rov_params_, any_color_targets_written ? 0b0110 : 0b0010));
|
||||
// Scale the offsets by the resolution scale.
|
||||
// system_temp_rov_params_.y = scaled 32bpp depth/stencil first host pixel
|
||||
// address
|
||||
|
@ -329,7 +329,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// Close the resolution scale conditional.
|
||||
DxbcOpEndIf();
|
||||
|
||||
if (color_targets_written) {
|
||||
if (any_color_targets_written) {
|
||||
// Get the 64bpp color offset to system_temp_rov_params_.w.
|
||||
// TODO(Triang3l): Find some game that aliases 64bpp with 32bpp to emulate
|
||||
// the real layout.
|
||||
|
@ -388,8 +388,6 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
}
|
||||
|
||||
void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
||||
bool depth_stencil_early = ROV_IsDepthStencilEarly();
|
||||
|
||||
uint32_t temp = PushSystemTemp();
|
||||
DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001));
|
||||
DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX));
|
||||
|
@ -413,6 +411,9 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
// temp.x = free
|
||||
DxbcOpIf(true, temp_x_src);
|
||||
|
||||
bool depth_stencil_early = ROV_IsDepthStencilEarly();
|
||||
bool shader_writes_depth = current_shader().writes_depth();
|
||||
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
// With early depth/stencil, depth/stencil writing may be deferred to the
|
||||
// end of the shader to prevent writing in case something (like alpha test,
|
||||
|
@ -427,7 +428,7 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
: temp_x_src);
|
||||
|
||||
if (!i) {
|
||||
if (writes_depth()) {
|
||||
if (shader_writes_depth) {
|
||||
// Clamp oDepth to the lower viewport depth bound (depth clamp happens
|
||||
// after the pixel shader in the pipeline, at least on Direct3D 11 and
|
||||
// Vulkan, thus applies to the shader's depth output too).
|
||||
|
@ -569,7 +570,7 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
// temp.w = free
|
||||
DxbcOpIf(true, temp_w_src);
|
||||
|
||||
if (writes_depth()) {
|
||||
if (shader_writes_depth) {
|
||||
// Copy the 24-bit depth common to all samples to sample_depth_stencil.
|
||||
// temp.x = shader-generated 24-bit depth
|
||||
DxbcOpMov(sample_depth_stencil_dest,
|
||||
|
@ -1024,7 +1025,8 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
// temp.z = viewport maximum depth if not writing to oDepth
|
||||
// temp.w = whether depth/stencil has been modified
|
||||
DxbcOpINE(temp_w_dest, sample_depth_stencil_src, temp_w_src);
|
||||
if (depth_stencil_early && !CanWriteZEarly()) {
|
||||
if (depth_stencil_early &&
|
||||
!current_shader().implicit_early_z_write_allowed()) {
|
||||
// Set the sample bit in bits 4:7 of system_temp_rov_params_.x - always
|
||||
// need to write late in this shader, as it may do something like
|
||||
// explicitly killing pixels.
|
||||
|
@ -1734,7 +1736,7 @@ void DxbcShaderTranslator::ROV_HandleAlphaBlendFactorCases(
|
|||
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() {
|
||||
// Check if alpha to coverage can be done at all in this shader.
|
||||
if (!writes_color_target(0)) {
|
||||
if (!current_shader().writes_color_target(0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1863,21 +1865,22 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() {
|
|||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
||||
if (!writes_any_color_target()) {
|
||||
uint32_t shader_writes_color_targets =
|
||||
current_shader().writes_color_targets();
|
||||
if (!shader_writes_color_targets) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if this sample needs to be discarded by alpha to coverage.
|
||||
CompletePixelShader_WriteToRTVs_AlphaToMask();
|
||||
|
||||
// Get the write mask as components, and also apply the exponent bias after
|
||||
// alpha to coverage because it needs the unbiased alpha from the shader.
|
||||
uint32_t guest_rt_mask = 0;
|
||||
uint32_t gamma_temp = PushSystemTemp();
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!writes_color_target(i)) {
|
||||
if (!(shader_writes_color_targets & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
guest_rt_mask |= 1 << i;
|
||||
// Apply the exponent bias after alpha to coverage because it needs the
|
||||
// unbiased alpha from the shader
|
||||
system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index;
|
||||
DxbcOpMul(DxbcDest::R(system_temps_color_[i]),
|
||||
DxbcSrc::R(system_temps_color_[i]),
|
||||
|
@ -1885,16 +1888,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
|||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_ColorExpBias_Vec)
|
||||
.Select(i));
|
||||
}
|
||||
|
||||
// Convert to gamma space - this is incorrect, since it must be done after
|
||||
// blending on the Xbox 360, but this is just one of many blending issues in
|
||||
// the RTV path.
|
||||
uint32_t gamma_temp = PushSystemTemp();
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!(guest_rt_mask & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
// Convert to gamma space - this is incorrect, since it must be done after
|
||||
// blending on the Xbox 360, but this is just one of many blending issues in
|
||||
// the RTV path.
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
DxbcOpAnd(DxbcDest::R(gamma_temp, 0b0001),
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
|
@ -1923,7 +1919,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
|||
// Host RT i, guest RT j.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
// mask = map.iiii == (0, 1, 2, 3)
|
||||
DxbcOpIEq(DxbcDest::R(remap_movc_mask_temp, guest_rt_mask),
|
||||
DxbcOpIEq(DxbcDest::R(remap_movc_mask_temp, shader_writes_color_targets),
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_ColorOutputMap_Vec)
|
||||
|
@ -1932,7 +1928,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
|||
bool guest_rt_first = true;
|
||||
for (uint32_t j = 0; j < 4; ++j) {
|
||||
// If map.i == j, move guest color j to the temporary host color.
|
||||
if (!(guest_rt_mask & (1 << j))) {
|
||||
if (!(shader_writes_color_targets & (1 << j))) {
|
||||
continue;
|
||||
}
|
||||
DxbcOpMovC(DxbcDest::R(remap_movc_target_temp),
|
||||
|
@ -1954,8 +1950,10 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
|
|||
return;
|
||||
}
|
||||
|
||||
bool shader_writes_depth = current_shader().writes_depth();
|
||||
|
||||
uint32_t temp;
|
||||
if (writes_depth()) {
|
||||
if (shader_writes_depth) {
|
||||
// The depth is already written to system_temp_depth_stencil_.x and clamped
|
||||
// to 0...1 with NaNs dropped (saturating in StoreResult); yzw are free.
|
||||
temp = system_temp_depth_stencil_;
|
||||
|
@ -1991,8 +1989,8 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
|
|||
// The smallest denormalized 20e4 number is -34 - should drop 23 mantissa
|
||||
// bits at -34.
|
||||
// Anything smaller than 2^-34 becomes 0.
|
||||
DxbcDest truncate_dest(writes_depth() ? DxbcDest::ODepth()
|
||||
: DxbcDest::ODepthLE());
|
||||
DxbcDest truncate_dest(shader_writes_depth ? DxbcDest::ODepth()
|
||||
: DxbcDest::ODepthLE());
|
||||
// Check if the number is representable as a float24 after truncation - the
|
||||
// exponent is at least -34.
|
||||
DxbcOpUGE(temp_y_dest, temp_x_src, DxbcSrc::LU(0x2E800000));
|
||||
|
@ -2076,7 +2074,7 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
|
|||
temp_y_src);
|
||||
}
|
||||
|
||||
if (!writes_depth()) {
|
||||
if (!shader_writes_depth) {
|
||||
// Release temp.
|
||||
PopSystemTemp();
|
||||
}
|
||||
|
@ -2106,7 +2104,7 @@ void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMaskSample(
|
|||
|
||||
void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() {
|
||||
// Check if alpha to coverage can be done at all in this shader.
|
||||
if (!writes_color_target(0)) {
|
||||
if (!current_shader().writes_color_target(0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2269,8 +2267,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
}
|
||||
|
||||
// Write color values.
|
||||
uint32_t shader_writes_color_targets =
|
||||
current_shader().writes_color_targets();
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!writes_color_target(i)) {
|
||||
if (!(shader_writes_color_targets & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -3156,7 +3156,7 @@ void DxbcShaderTranslator::CompletePixelShader() {
|
|||
return;
|
||||
}
|
||||
|
||||
if (writes_color_target(0)) {
|
||||
if (current_shader().writes_color_target(0)) {
|
||||
// Alpha test.
|
||||
// X - mask, then masked result (SGPR for loading, VGPR for masking).
|
||||
// Y - operation result (SGPR for mask operations, VGPR for alpha
|
||||
|
|
|
@ -97,6 +97,7 @@ union SQ_PROGRAM_CNTL {
|
|||
// Note from a2xx.xml:
|
||||
// Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG,
|
||||
// but high bit is set to indicate "0 registers used".
|
||||
// (Register count = (num_reg & 0x80) ? 0 : (num_reg + 1))
|
||||
uint32_t vs_num_reg : 8; // +0
|
||||
uint32_t ps_num_reg : 8; // +8
|
||||
uint32_t vs_resource : 1; // +16
|
||||
|
|
|
@ -55,7 +55,7 @@ std::filesystem::path Shader::Translation::Dump(
|
|||
}
|
||||
path = path /
|
||||
fmt::format(
|
||||
"shader_{:016X}_{:08X}.{}.{}", shader().ucode_data_hash(),
|
||||
"shader_{:016X}_{:016X}.{}.{}", shader().ucode_data_hash(),
|
||||
modification(), path_prefix,
|
||||
shader().type() == xenos::ShaderType::kVertex ? "vert" : "frag");
|
||||
FILE* f = filesystem::OpenFile(path, "wb");
|
||||
|
@ -78,7 +78,7 @@ std::filesystem::path Shader::Translation::Dump(
|
|||
return std::move(path);
|
||||
}
|
||||
|
||||
Shader::Translation* Shader::GetOrCreateTranslation(uint32_t modification,
|
||||
Shader::Translation* Shader::GetOrCreateTranslation(uint64_t modification,
|
||||
bool* is_new) {
|
||||
auto it = translations_.find(modification);
|
||||
if (it != translations_.end()) {
|
||||
|
@ -95,7 +95,7 @@ Shader::Translation* Shader::GetOrCreateTranslation(uint32_t modification,
|
|||
return translation;
|
||||
}
|
||||
|
||||
void Shader::DestroyTranslation(uint32_t modification) {
|
||||
void Shader::DestroyTranslation(uint64_t modification) {
|
||||
auto it = translations_.find(modification);
|
||||
if (it == translations_.end()) {
|
||||
return;
|
||||
|
@ -124,7 +124,7 @@ std::filesystem::path Shader::DumpUcodeBinary(
|
|||
return std::move(path);
|
||||
}
|
||||
|
||||
Shader::Translation* Shader::CreateTranslationInstance(uint32_t modification) {
|
||||
Shader::Translation* Shader::CreateTranslationInstance(uint64_t modification) {
|
||||
// Default implementation for simple cases like ucode disassembly.
|
||||
return new Translation(*this, modification);
|
||||
}
|
||||
|
|
|
@ -11,9 +11,9 @@
|
|||
#define XENIA_GPU_SHADER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
@ -593,6 +593,41 @@ struct ParsedAluInstruction {
|
|||
void Disassemble(StringBuffer* out) const;
|
||||
};
|
||||
|
||||
void ParseControlFlowExec(const ucode::ControlFlowExecInstruction& cf,
|
||||
uint32_t cf_index, ParsedExecInstruction& instr);
|
||||
void ParseControlFlowCondExec(const ucode::ControlFlowCondExecInstruction& cf,
|
||||
uint32_t cf_index, ParsedExecInstruction& instr);
|
||||
void ParseControlFlowCondExecPred(
|
||||
const ucode::ControlFlowCondExecPredInstruction& cf, uint32_t cf_index,
|
||||
ParsedExecInstruction& instr);
|
||||
void ParseControlFlowLoopStart(const ucode::ControlFlowLoopStartInstruction& cf,
|
||||
uint32_t cf_index,
|
||||
ParsedLoopStartInstruction& instr);
|
||||
void ParseControlFlowLoopEnd(const ucode::ControlFlowLoopEndInstruction& cf,
|
||||
uint32_t cf_index,
|
||||
ParsedLoopEndInstruction& instr);
|
||||
void ParseControlFlowCondCall(const ucode::ControlFlowCondCallInstruction& cf,
|
||||
uint32_t cf_index, ParsedCallInstruction& instr);
|
||||
void ParseControlFlowReturn(const ucode::ControlFlowReturnInstruction& cf,
|
||||
uint32_t cf_index, ParsedReturnInstruction& instr);
|
||||
void ParseControlFlowCondJmp(const ucode::ControlFlowCondJmpInstruction& cf,
|
||||
uint32_t cf_index, ParsedJumpInstruction& instr);
|
||||
void ParseControlFlowAlloc(const ucode::ControlFlowAllocInstruction& cf,
|
||||
uint32_t cf_index, bool is_vertex_shader,
|
||||
ParsedAllocInstruction& instr);
|
||||
|
||||
// Returns whether the fetch is a full one, and the next parsed mini vertex
|
||||
// fetch should inherit most of its parameters.
|
||||
bool ParseVertexFetchInstruction(
|
||||
const ucode::VertexFetchInstruction& op,
|
||||
const ucode::VertexFetchInstruction& previous_full_op,
|
||||
ParsedVertexFetchInstruction& instr);
|
||||
void ParseTextureFetchInstruction(const ucode::TextureFetchInstruction& op,
|
||||
ParsedTextureFetchInstruction& instr);
|
||||
void ParseAluInstruction(const ucode::AluInstruction& op,
|
||||
xenos::ShaderType shader_type,
|
||||
ParsedAluInstruction& instr);
|
||||
|
||||
class Shader {
|
||||
public:
|
||||
// Type of the vertex shader in a D3D11-like rendering pipeline - shader
|
||||
|
@ -619,12 +654,8 @@ class Shader {
|
|||
|
||||
struct VertexBinding {
|
||||
struct Attribute {
|
||||
// Attribute index, 0-based in the entire shader.
|
||||
int attrib_index;
|
||||
// Fetch instruction with all parameters.
|
||||
ParsedVertexFetchInstruction fetch_instr;
|
||||
// Size of the attribute, in words.
|
||||
uint32_t size_words;
|
||||
};
|
||||
|
||||
// Index within the vertex binding listing.
|
||||
|
@ -691,6 +722,10 @@ class Shader {
|
|||
}
|
||||
};
|
||||
|
||||
// Based on the number of AS_VS/PS_EXPORT_STREAM_* enum sets found in a game
|
||||
// .pdb.
|
||||
static constexpr uint32_t kMaxMemExports = 16;
|
||||
|
||||
class Translation {
|
||||
public:
|
||||
virtual ~Translation() {}
|
||||
|
@ -698,7 +733,7 @@ class Shader {
|
|||
Shader& shader() const { return shader_; }
|
||||
|
||||
// Translator-specific modification bits.
|
||||
uint32_t modification() const { return modification_; }
|
||||
uint64_t modification() const { return modification_; }
|
||||
|
||||
// True if the shader was translated and prepared without error.
|
||||
bool is_valid() const { return is_valid_; }
|
||||
|
@ -735,7 +770,7 @@ class Shader {
|
|||
const char* path_prefix);
|
||||
|
||||
protected:
|
||||
Translation(Shader& shader, uint32_t modification)
|
||||
Translation(Shader& shader, uint64_t modification)
|
||||
: shader_(shader), modification_(modification) {}
|
||||
|
||||
private:
|
||||
|
@ -743,7 +778,7 @@ class Shader {
|
|||
friend class ShaderTranslator;
|
||||
|
||||
Shader& shader_;
|
||||
uint32_t modification_;
|
||||
uint64_t modification_;
|
||||
|
||||
bool is_valid_ = false;
|
||||
bool is_translated_ = false;
|
||||
|
@ -765,32 +800,23 @@ class Shader {
|
|||
const uint32_t* ucode_dwords() const { return ucode_data_.data(); }
|
||||
size_t ucode_dword_count() const { return ucode_data_.size(); }
|
||||
|
||||
// Host translations with the specified modification bits. Not thread-safe
|
||||
// with respect to translation creation/destruction.
|
||||
const std::unordered_map<uint32_t, Translation*>& translations() const {
|
||||
return translations_;
|
||||
}
|
||||
Translation* GetTranslation(uint32_t modification) const {
|
||||
auto it = translations_.find(modification);
|
||||
if (it != translations_.cend()) {
|
||||
return it->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
Translation* GetOrCreateTranslation(uint32_t modification,
|
||||
bool* is_new = nullptr);
|
||||
// For shader storage loading, to remove a modification in case of translation
|
||||
// failure. Not thread-safe.
|
||||
void DestroyTranslation(uint32_t modification);
|
||||
bool is_ucode_analyzed() const { return is_ucode_analyzed_; }
|
||||
// ucode_disasm_buffer is temporary storage for disassembly (provided
|
||||
// externally so it won't need to be reallocated for every shader).
|
||||
void AnalyzeUcode(StringBuffer& ucode_disasm_buffer);
|
||||
|
||||
// The following parameters, until the translation, are valid if ucode
|
||||
// information has been gathered.
|
||||
|
||||
// Microcode disassembly in D3D format.
|
||||
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
|
||||
|
||||
// All vertex bindings used in the shader.
|
||||
// Valid for vertex shaders only.
|
||||
const std::vector<VertexBinding>& vertex_bindings() const {
|
||||
return vertex_bindings_;
|
||||
}
|
||||
|
||||
// All texture bindings used in the shader.
|
||||
// Valid for both vertex and pixel shaders.
|
||||
const std::vector<TextureBinding>& texture_bindings() const {
|
||||
return texture_bindings_;
|
||||
}
|
||||
|
@ -800,24 +826,99 @@ class Shader {
|
|||
return constant_register_map_;
|
||||
}
|
||||
|
||||
// uint5[Shader::kMaxMemExports] - bits indicating which eM# registers have
|
||||
// been written to after each `alloc export`, for up to Shader::kMaxMemExports
|
||||
// exports. This will contain zero for certain corrupt exports - for those to
|
||||
// which a valid eA was not written via a MAD with a stream constant.
|
||||
const uint8_t* memexport_eM_written() const { return memexport_eM_written_; }
|
||||
|
||||
// All c# registers used as the addend in MAD operations to eA.
|
||||
const std::vector<uint32_t>& memexport_stream_constants() const {
|
||||
const std::set<uint32_t>& memexport_stream_constants() const {
|
||||
return memexport_stream_constants_;
|
||||
}
|
||||
|
||||
// Returns true if the given color target index [0-3].
|
||||
bool writes_color_target(uint32_t i) const {
|
||||
return writes_color_targets_[i];
|
||||
// Labels that jumps (explicit or from loops) can be done to.
|
||||
const std::set<uint32_t>& label_addresses() const { return label_addresses_; }
|
||||
|
||||
// Exclusive upper bound of the indexes of paired control flow instructions
|
||||
// (each corresponds to 3 dwords).
|
||||
uint32_t cf_pair_index_bound() const { return cf_pair_index_bound_; }
|
||||
|
||||
// Upper bound of temporary registers addressed statically by the shader -
|
||||
// highest static register address + 1, or 0 if no registers referenced this
|
||||
// way. SQ_PROGRAM_CNTL is not always reliable - some draws (like single point
|
||||
// draws with oPos = 0001 that are done by Xbox 360's Direct3D 9 sometimes;
|
||||
// can be reproduced by launching Arrival in Halo 3 from the campaign lobby)
|
||||
// that aren't supposed to cover any pixels use an invalid (zero)
|
||||
// SQ_PROGRAM_CNTL, but with an outdated pixel shader loaded, in this case
|
||||
// SQ_PROGRAM_CNTL may contain a number smaller than actually needed by the
|
||||
// pixel shader - SQ_PROGRAM_CNTL should be used to go above this count if
|
||||
// uses_register_dynamic_addressing is true.
|
||||
uint32_t register_static_address_bound() const {
|
||||
return register_static_address_bound_;
|
||||
}
|
||||
|
||||
// True if the shader overrides the pixel depth.
|
||||
bool writes_depth() const { return writes_depth_; }
|
||||
// Whether the shader addresses temporary registers dynamically, thus
|
||||
// SQ_PROGRAM_CNTL should determine the number of registers to use, not only
|
||||
// register_static_address_bound.
|
||||
bool uses_register_dynamic_addressing() const {
|
||||
return uses_register_dynamic_addressing_;
|
||||
}
|
||||
|
||||
// For building shader modification bits (and also for normalization of them),
|
||||
// returns the amount of temporary registers that need to be allocated
|
||||
// explicitly - if not using register dynamic addressing, the shader
|
||||
// translator will use register_static_address_bound directly.
|
||||
uint32_t GetDynamicAddressableRegisterCount(
|
||||
uint32_t program_cntl_num_reg) const {
|
||||
if (!uses_register_dynamic_addressing()) {
|
||||
return 0;
|
||||
}
|
||||
return std::max((program_cntl_num_reg & 0x80)
|
||||
? uint32_t(0)
|
||||
: (program_cntl_num_reg + uint32_t(1)),
|
||||
register_static_address_bound());
|
||||
}
|
||||
|
||||
// True if the current shader has any `kill` instructions.
|
||||
bool kills_pixels() const { return kills_pixels_; }
|
||||
|
||||
// Microcode disassembly in D3D format.
|
||||
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
|
||||
// True if the shader overrides the pixel depth.
|
||||
bool writes_depth() const { return writes_depth_; }
|
||||
|
||||
// Whether the shader can have early depth and stencil writing enabled, unless
|
||||
// alpha test or alpha to coverage is enabled.
|
||||
bool implicit_early_z_write_allowed() const {
|
||||
// TODO(Triang3l): Investigate what happens to memexport when the pixel
|
||||
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
|
||||
// depth/stencil.
|
||||
return !writes_depth() && !kills_pixels() &&
|
||||
memexport_stream_constants().empty();
|
||||
}
|
||||
|
||||
// Whether each color render target is written to on any exection path.
|
||||
uint32_t writes_color_targets() const { return writes_color_targets_; }
|
||||
bool writes_color_target(uint32_t i) const {
|
||||
return (writes_color_targets() & (uint32_t(1) << i)) != 0;
|
||||
}
|
||||
|
||||
// Host translations with the specified modification bits. Not thread-safe
|
||||
// with respect to translation creation/destruction.
|
||||
const std::unordered_map<uint64_t, Translation*>& translations() const {
|
||||
return translations_;
|
||||
}
|
||||
Translation* GetTranslation(uint64_t modification) const {
|
||||
auto it = translations_.find(modification);
|
||||
if (it != translations_.cend()) {
|
||||
return it->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
Translation* GetOrCreateTranslation(uint64_t modification,
|
||||
bool* is_new = nullptr);
|
||||
// For shader storage loading, to remove a modification in case of translation
|
||||
// failure. Not thread-safe.
|
||||
void DestroyTranslation(uint64_t modification);
|
||||
|
||||
// An externally managed identifier of the shader storage the microcode of the
|
||||
// shader was last written to, or was loaded from, to only write the shader
|
||||
|
@ -835,33 +936,68 @@ class Shader {
|
|||
protected:
|
||||
friend class ShaderTranslator;
|
||||
|
||||
virtual Translation* CreateTranslationInstance(uint32_t modification);
|
||||
virtual Translation* CreateTranslationInstance(uint64_t modification);
|
||||
|
||||
xenos::ShaderType shader_type_;
|
||||
std::vector<uint32_t> ucode_data_;
|
||||
uint64_t ucode_data_hash_;
|
||||
|
||||
// Modification bits -> translation.
|
||||
std::unordered_map<uint32_t, Translation*> translations_;
|
||||
// Whether info needed before translating has been gathered already - may be
|
||||
// needed to determine which modifications are actually needed and make sense
|
||||
// (for instance, there may be draws not covering anything and not allocating
|
||||
// any pixel shader registers in SQ_PROGRAM_CNTL, but still using the pixel
|
||||
// shader from the previous draw - in this case, every shader that happens to
|
||||
// be before such draw will need to be translated again with a different
|
||||
// dynamically addressed register count, which may cause compilation of
|
||||
// different random pipelines across many random frames, thus causing
|
||||
// stuttering - normally host pipeline states are deterministically only
|
||||
// compiled when a new material appears in the game, and having the order of
|
||||
// draws also matter in such unpredictable way would break this rule; limit
|
||||
// the effect to shaders with dynamic register addressing only, which are
|
||||
// extremely rare), also some info needed for drawing is collected during the
|
||||
// ucode analysis.
|
||||
bool is_ucode_analyzed_ = false;
|
||||
|
||||
// Whether setup of the post-translation parameters (listed below, plus those
|
||||
// specific to the implementation) has been initiated, by any thread. If
|
||||
// translation is performed on multiple threads, only one thread must be
|
||||
// setting this up (other threads would write the same data anyway).
|
||||
std::atomic_flag post_translation_info_set_up_ = ATOMIC_FLAG_INIT;
|
||||
|
||||
// Initialized after the first successful translation (these don't depend on
|
||||
// the host-side modification bits).
|
||||
std::string ucode_disassembly_;
|
||||
std::vector<VertexBinding> vertex_bindings_;
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
ConstantRegisterMap constant_register_map_ = {0};
|
||||
bool writes_color_targets_[4] = {false, false, false, false};
|
||||
bool writes_depth_ = false;
|
||||
uint8_t memexport_eM_written_[kMaxMemExports] = {};
|
||||
std::set<uint32_t> memexport_stream_constants_;
|
||||
std::set<uint32_t> label_addresses_;
|
||||
uint32_t cf_pair_index_bound_ = 0;
|
||||
uint32_t register_static_address_bound_ = 0;
|
||||
bool uses_register_dynamic_addressing_ = false;
|
||||
bool kills_pixels_ = false;
|
||||
std::vector<uint32_t> memexport_stream_constants_;
|
||||
bool writes_depth_ = false;
|
||||
uint32_t writes_color_targets_ = 0b0000;
|
||||
|
||||
// Modification bits -> translation.
|
||||
std::unordered_map<uint64_t, Translation*> translations_;
|
||||
|
||||
uint32_t ucode_storage_index_ = UINT32_MAX;
|
||||
|
||||
private:
|
||||
void GatherExecInformation(
|
||||
const ParsedExecInstruction& instr,
|
||||
ucode::VertexFetchInstruction& previous_vfetch_full,
|
||||
uint32_t& unique_texture_bindings, uint32_t memexport_alloc_current_count,
|
||||
uint32_t& memexport_eA_written, StringBuffer& ucode_disasm_buffer);
|
||||
void GatherVertexFetchInformation(
|
||||
const ucode::VertexFetchInstruction& op,
|
||||
ucode::VertexFetchInstruction& previous_vfetch_full,
|
||||
StringBuffer& ucode_disasm_buffer);
|
||||
void GatherTextureFetchInformation(const ucode::TextureFetchInstruction& op,
|
||||
uint32_t& unique_texture_bindings,
|
||||
StringBuffer& ucode_disasm_buffer);
|
||||
void GatherAluInstructionInformation(const ucode::AluInstruction& op,
|
||||
uint32_t memexport_alloc_current_count,
|
||||
uint32_t& memexport_eA_written,
|
||||
StringBuffer& ucode_disasm_buffer);
|
||||
void GatherOperandInformation(const InstructionOperand& operand);
|
||||
void GatherFetchResultInformation(const InstructionResult& result);
|
||||
void GatherAluResultInformation(const InstructionResult& result,
|
||||
uint32_t memexport_alloc_current_count);
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "xenia/base/main.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/shader_translator.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
|
@ -104,6 +105,8 @@ int shader_compiler_main(const std::vector<std::string>& args) {
|
|||
auto shader = std::make_unique<Shader>(
|
||||
shader_type, ucode_data_hash, ucode_dwords.data(), ucode_dwords.size());
|
||||
|
||||
shader->AnalyzeUcode(StringBuffer());
|
||||
|
||||
std::unique_ptr<ShaderTranslator> translator;
|
||||
if (cvars::shader_output_type == "spirv" ||
|
||||
cvars::shader_output_type == "spirvtext") {
|
||||
|
@ -114,7 +117,15 @@ int shader_compiler_main(const std::vector<std::string>& args) {
|
|||
0, cvars::shader_output_bindless_resources,
|
||||
cvars::shader_output_dxbc_rov);
|
||||
} else {
|
||||
translator = std::make_unique<UcodeShaderTranslator>();
|
||||
// Just output microcode disassembly generated during microcode information
|
||||
// gathering.
|
||||
if (!cvars::shader_output.empty()) {
|
||||
auto output_file = filesystem::OpenFile(cvars::shader_output, "wb");
|
||||
fwrite(shader->ucode_disassembly().c_str(), 1,
|
||||
shader->ucode_disassembly().length(), output_file);
|
||||
fclose(output_file);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
|
@ -140,12 +151,12 @@ int shader_compiler_main(const std::vector<std::string>& args) {
|
|||
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
|
||||
}
|
||||
}
|
||||
uint32_t modification =
|
||||
translator->GetDefaultModification(shader_type, host_vertex_shader_type);
|
||||
uint64_t modification = translator->GetDefaultModification(
|
||||
shader_type, 64, host_vertex_shader_type);
|
||||
|
||||
Shader::Translation* translation =
|
||||
shader->GetOrCreateTranslation(modification);
|
||||
translator->Translate(*translation);
|
||||
translator->TranslateAnalyzedShader(*translation);
|
||||
|
||||
const void* source_data = translation->translated_binary().data();
|
||||
size_t source_data_size = translation->translated_binary().size();
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -29,106 +29,43 @@ class ShaderTranslator {
|
|||
public:
|
||||
virtual ~ShaderTranslator();
|
||||
|
||||
virtual uint32_t GetDefaultModification(
|
||||
virtual uint64_t GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
uint32_t dynamic_addressable_register_count,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool Translate(Shader::Translation& translation, reg::SQ_PROGRAM_CNTL cntl);
|
||||
bool Translate(Shader::Translation& translation);
|
||||
// AnalyzeUcode must be done on the shader before translating!
|
||||
bool TranslateAnalyzedShader(Shader::Translation& translation);
|
||||
|
||||
protected:
|
||||
ShaderTranslator();
|
||||
|
||||
// Resets translator state before beginning translation.
|
||||
// shader_type is passed here so translator implementations can generate
|
||||
// special fixed shaders for internal use, and set up the type for this
|
||||
// purpose.
|
||||
virtual void Reset(xenos::ShaderType shader_type);
|
||||
virtual void Reset();
|
||||
|
||||
// Current host-side modification being generated.
|
||||
uint32_t modification() const { return modification_; }
|
||||
// Shader and modification currently being translated.
|
||||
Shader::Translation& current_translation() const { return *translation_; }
|
||||
Shader& current_shader() const { return current_translation().shader(); }
|
||||
|
||||
// Register count from SQ_PROGRAM_CNTL, stored by the implementation in its
|
||||
// modification bits.
|
||||
virtual uint32_t GetModificationRegisterCount() const { return 64; }
|
||||
|
||||
// Register count.
|
||||
uint32_t register_count() const { return register_count_; }
|
||||
// True if the current shader is a vertex shader.
|
||||
bool is_vertex_shader() const {
|
||||
return shader_type_ == xenos::ShaderType::kVertex;
|
||||
return current_shader().type() == xenos::ShaderType::kVertex;
|
||||
}
|
||||
// True if the current shader is a pixel shader.
|
||||
bool is_pixel_shader() const {
|
||||
return shader_type_ == xenos::ShaderType::kPixel;
|
||||
}
|
||||
// Labels that jumps (explicit or from loops) can be done to, gathered before
|
||||
// translation.
|
||||
const std::set<uint32_t>& label_addresses() const { return label_addresses_; }
|
||||
// Used constant register info, populated before translation.
|
||||
const Shader::ConstantRegisterMap& constant_register_map() const {
|
||||
return constant_register_map_;
|
||||
}
|
||||
// True if the current shader addresses general-purpose registers with dynamic
|
||||
// indices, set before translation. Doesn't include writes to r[#+a#] with an
|
||||
// empty used write mask.
|
||||
bool uses_register_dynamic_addressing() const {
|
||||
return uses_register_dynamic_addressing_;
|
||||
}
|
||||
// True if the current shader writes to a color target on any execution path,
|
||||
// set before translation. Doesn't include writes with an empty used write
|
||||
// mask.
|
||||
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
||||
bool writes_any_color_target() const {
|
||||
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
|
||||
if (writes_color_targets_[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// True if the current shader overrides the pixel depth, set before
|
||||
// translation. Doesn't include writes with an empty used write mask.
|
||||
bool writes_depth() const { return writes_depth_; }
|
||||
// True if the current shader has any `kill` instructions.
|
||||
bool kills_pixels() const { return kills_pixels_; }
|
||||
// A list of all vertex bindings, populated before translation occurs.
|
||||
const std::vector<Shader::VertexBinding>& vertex_bindings() const {
|
||||
return vertex_bindings_;
|
||||
}
|
||||
// A list of all texture bindings, populated before translation occurs.
|
||||
const std::vector<Shader::TextureBinding>& texture_bindings() const {
|
||||
return texture_bindings_;
|
||||
return current_shader().type() == xenos::ShaderType::kPixel;
|
||||
}
|
||||
|
||||
// Based on the number of AS_VS/PS_EXPORT_STREAM_* enum sets found in a game
|
||||
// .pdb.
|
||||
static constexpr uint32_t kMaxMemExports = 16;
|
||||
// Bits indicating which eM# registers have been written to after each
|
||||
// `alloc export`, for up to kMaxMemExports exports. This will contain zero
|
||||
// for certain corrupt exports - that don't write to eA before writing to eM#,
|
||||
// or if the write was done any way other than MAD with a stream constant.
|
||||
const uint8_t* memexport_eM_written() const { return memexport_eM_written_; }
|
||||
// All c# registers used as the addend in MAD operations to eA, populated
|
||||
// before translation occurs.
|
||||
const std::set<uint32_t>& memexport_stream_constants() const {
|
||||
return memexport_stream_constants_;
|
||||
}
|
||||
// Temporary register count, accessible via static and dynamic addressing.
|
||||
uint32_t register_count() const { return register_count_; }
|
||||
|
||||
// Whether the shader can have early depth and stencil writing enabled, unless
|
||||
// alpha test or alpha to coverage is enabled. Data gathered before
|
||||
// translation.
|
||||
bool CanWriteZEarly() const {
|
||||
// TODO(Triang3l): Investigate what happens to memexport when the pixel
|
||||
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
|
||||
// depth/stencil.
|
||||
return !writes_depth_ && !kills_pixels_ &&
|
||||
memexport_stream_constants_.empty();
|
||||
}
|
||||
|
||||
// Current line number in the ucode disassembly.
|
||||
size_t ucode_disasm_line_number() const { return ucode_disasm_line_number_; }
|
||||
// Ucode disassembly buffer accumulated during translation.
|
||||
StringBuffer& ucode_disasm_buffer() { return ucode_disasm_buffer_; }
|
||||
// Emits a translation error that will be passed back in the result.
|
||||
virtual void EmitTranslationError(const char* message, bool is_fatal = true);
|
||||
|
||||
|
@ -143,10 +80,7 @@ class ShaderTranslator {
|
|||
}
|
||||
|
||||
// Handles post-translation tasks when the shader has been fully translated.
|
||||
// setup_shader_post_translation_info if non-modification-specific parameters
|
||||
// of the Shader object behind the Translation can be set by this invocation.
|
||||
virtual void PostTranslation(Shader::Translation& translation,
|
||||
bool setup_shader_post_translation_info) {}
|
||||
virtual void PostTranslation() {}
|
||||
// Sets the host disassembly on a shader.
|
||||
void set_host_disassembly(Shader::Translation& translation,
|
||||
std::string value) {
|
||||
|
@ -201,130 +135,23 @@ class ShaderTranslator {
|
|||
virtual void ProcessAluInstruction(const ParsedAluInstruction& instr) {}
|
||||
|
||||
private:
|
||||
struct AluOpcodeInfo {
|
||||
const char* name;
|
||||
uint32_t argument_count;
|
||||
uint32_t src_swizzle_component_count;
|
||||
};
|
||||
|
||||
bool TranslateInternal(Shader::Translation& translation);
|
||||
|
||||
void MarkUcodeInstruction(uint32_t dword_offset);
|
||||
void AppendUcodeDisasm(char c);
|
||||
void AppendUcodeDisasm(const char* value);
|
||||
void AppendUcodeDisasmFormat(const char* format, ...);
|
||||
|
||||
void GatherInstructionInformation(const ucode::ControlFlowInstruction& cf);
|
||||
void GatherVertexFetchInformation(const ucode::VertexFetchInstruction& op);
|
||||
void GatherTextureFetchInformation(const ucode::TextureFetchInstruction& op);
|
||||
void TranslateControlFlowInstruction(const ucode::ControlFlowInstruction& cf);
|
||||
void TranslateControlFlowNop(const ucode::ControlFlowInstruction& cf);
|
||||
void TranslateControlFlowExec(const ucode::ControlFlowExecInstruction& cf);
|
||||
void TranslateControlFlowCondExec(
|
||||
const ucode::ControlFlowCondExecInstruction& cf);
|
||||
void TranslateControlFlowCondExecPred(
|
||||
const ucode::ControlFlowCondExecPredInstruction& cf);
|
||||
void TranslateControlFlowLoopStart(
|
||||
const ucode::ControlFlowLoopStartInstruction& cf);
|
||||
void TranslateControlFlowLoopEnd(
|
||||
const ucode::ControlFlowLoopEndInstruction& cf);
|
||||
void TranslateControlFlowCondCall(
|
||||
const ucode::ControlFlowCondCallInstruction& cf);
|
||||
void TranslateControlFlowReturn(
|
||||
const ucode::ControlFlowReturnInstruction& cf);
|
||||
void TranslateControlFlowCondJmp(
|
||||
const ucode::ControlFlowCondJmpInstruction& cf);
|
||||
void TranslateControlFlowAlloc(const ucode::ControlFlowAllocInstruction& cf);
|
||||
|
||||
void TranslateExecInstructions(const ParsedExecInstruction& instr);
|
||||
|
||||
void TranslateVertexFetchInstruction(const ucode::VertexFetchInstruction& op);
|
||||
void ParseVertexFetchInstruction(const ucode::VertexFetchInstruction& op,
|
||||
ParsedVertexFetchInstruction* out_instr);
|
||||
|
||||
void TranslateTextureFetchInstruction(
|
||||
const ucode::TextureFetchInstruction& op);
|
||||
void ParseTextureFetchInstruction(const ucode::TextureFetchInstruction& op,
|
||||
ParsedTextureFetchInstruction* out_instr);
|
||||
|
||||
void TranslateAluInstruction(const ucode::AluInstruction& op);
|
||||
void ParseAluInstruction(const ucode::AluInstruction& op,
|
||||
ParsedAluInstruction& out_instr) const;
|
||||
static void ParseAluInstructionOperand(const ucode::AluInstruction& op,
|
||||
uint32_t i,
|
||||
uint32_t swizzle_component_count,
|
||||
InstructionOperand& out_op);
|
||||
static void ParseAluInstructionOperandSpecial(
|
||||
const ucode::AluInstruction& op, InstructionStorageSource storage_source,
|
||||
uint32_t reg, bool negate, int const_slot, uint32_t component_index,
|
||||
InstructionOperand& out_op);
|
||||
|
||||
// Input shader metadata and microcode.
|
||||
xenos::ShaderType shader_type_;
|
||||
const uint32_t* ucode_dwords_;
|
||||
size_t ucode_dword_count_;
|
||||
uint32_t register_count_;
|
||||
|
||||
// Current host-side modification being generated.
|
||||
uint32_t modification_ = 0;
|
||||
// Current shader and modification being translated.
|
||||
Shader::Translation* translation_ = nullptr;
|
||||
|
||||
// Accumulated translation errors.
|
||||
std::vector<Shader::Error> errors_;
|
||||
|
||||
// Temporary register count, accessible via static and dynamic addressing.
|
||||
uint32_t register_count_ = 0;
|
||||
|
||||
// Current control flow dword index.
|
||||
uint32_t cf_index_ = 0;
|
||||
|
||||
// Microcode disassembly buffer, accumulated throughout the translation.
|
||||
StringBuffer ucode_disasm_buffer_;
|
||||
// Current line number in the disasm, which can be used for source annotation.
|
||||
size_t ucode_disasm_line_number_ = 0;
|
||||
// Last offset used when scanning for line numbers.
|
||||
size_t previous_ucode_disasm_scan_offset_ = 0;
|
||||
|
||||
// Kept for supporting vfetch_mini.
|
||||
ucode::VertexFetchInstruction previous_vfetch_full_;
|
||||
|
||||
// Labels that jumps (explicit or from loops) can be done to, gathered before
|
||||
// translation.
|
||||
std::set<uint32_t> label_addresses_;
|
||||
|
||||
// Detected binding information gathered before translation. Must not be
|
||||
// affected by the modification index.
|
||||
int total_attrib_count_ = 0;
|
||||
std::vector<Shader::VertexBinding> vertex_bindings_;
|
||||
std::vector<Shader::TextureBinding> texture_bindings_;
|
||||
uint32_t unique_vertex_bindings_ = 0;
|
||||
uint32_t unique_texture_bindings_ = 0;
|
||||
|
||||
// These all are gathered before translation.
|
||||
// uses_register_dynamic_addressing_ for writes, writes_color_targets_,
|
||||
// writes_depth_ don't include empty used write masks.
|
||||
// Must not be affected by the modification index.
|
||||
Shader::ConstantRegisterMap constant_register_map_ = {0};
|
||||
bool uses_register_dynamic_addressing_ = false;
|
||||
bool writes_color_targets_[4] = {false, false, false, false};
|
||||
bool writes_depth_ = false;
|
||||
bool kills_pixels_ = false;
|
||||
|
||||
// Memexport info is gathered before translation.
|
||||
// Must not be affected by the modification index.
|
||||
uint32_t memexport_alloc_count_ = 0;
|
||||
// For register allocation in implementations - what was used after each
|
||||
// `alloc export`.
|
||||
uint32_t memexport_eA_written_ = 0;
|
||||
uint8_t memexport_eM_written_[kMaxMemExports] = {0};
|
||||
std::set<uint32_t> memexport_stream_constants_;
|
||||
|
||||
static const AluOpcodeInfo alu_vector_opcode_infos_[0x20];
|
||||
static const AluOpcodeInfo alu_scalar_opcode_infos_[0x40];
|
||||
};
|
||||
|
||||
class UcodeShaderTranslator : public ShaderTranslator {
|
||||
public:
|
||||
UcodeShaderTranslator() = default;
|
||||
|
||||
protected:
|
||||
std::vector<uint8_t> CompleteTranslation() override;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -203,7 +203,9 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant,
|
||||
push_constants_type, "push_consts");
|
||||
|
||||
if (!texture_bindings().empty()) {
|
||||
const std::vector<Shader::TextureBinding>& texture_bindings =
|
||||
current_shader().texture_bindings();
|
||||
if (!texture_bindings.empty()) {
|
||||
image_2d_type_ =
|
||||
b.makeImageType(float_type_, spv::Dim::Dim2D, false, false, false, 1,
|
||||
spv::ImageFormat::ImageFormatUnknown);
|
||||
|
@ -220,7 +222,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
b.makeSampledImageType(image_cube_type_)};
|
||||
|
||||
uint32_t num_tex_bindings = 0;
|
||||
for (const auto& binding : texture_bindings()) {
|
||||
for (const auto& binding : texture_bindings) {
|
||||
// Calculate the highest binding index.
|
||||
num_tex_bindings =
|
||||
std::max(num_tex_bindings, uint32_t(binding.binding_index + 1));
|
||||
|
@ -241,7 +243,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
}
|
||||
|
||||
// Set up the map from binding -> ssbo index
|
||||
for (const auto& binding : texture_bindings()) {
|
||||
for (const auto& binding : texture_bindings) {
|
||||
tex_binding_map_[binding.fetch_constant] =
|
||||
uint32_t(binding.binding_index);
|
||||
}
|
||||
|
@ -254,7 +256,9 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
// Vertex inputs/outputs
|
||||
// Inputs: 32 SSBOs on DS 2 binding 0
|
||||
|
||||
if (!vertex_bindings().empty()) {
|
||||
const std::vector<Shader::VertexBinding>& vertex_bindings =
|
||||
current_shader().vertex_bindings();
|
||||
if (!vertex_bindings.empty()) {
|
||||
// Runtime array for vertex data
|
||||
Id vtx_t = b.makeRuntimeArray(uint_type_);
|
||||
b.addDecoration(vtx_t, spv::Decoration::DecorationArrayStride,
|
||||
|
@ -269,7 +273,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
|
||||
// Create the vertex bindings variable.
|
||||
Id vtx_a_t = b.makeArrayType(
|
||||
vtx_s, b.makeUintConstant(uint32_t(vertex_bindings().size())), 0);
|
||||
vtx_s, b.makeUintConstant(uint32_t(vertex_bindings.size())), 0);
|
||||
vtx_ = b.createVariable(spv::StorageClass::StorageClassUniform, vtx_a_t,
|
||||
"vertex_bindings");
|
||||
|
||||
|
@ -279,7 +283,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
b.addDecoration(vtx_, spv::Decoration::DecorationNonWritable);
|
||||
|
||||
// Set up the map from binding -> ssbo index
|
||||
for (const auto& binding : vertex_bindings()) {
|
||||
for (const auto& binding : vertex_bindings) {
|
||||
vtx_binding_map_[binding.fetch_constant] = binding.binding_index;
|
||||
}
|
||||
}
|
||||
|
@ -494,7 +498,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
|||
b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft);
|
||||
|
||||
// If we write a new depth value, we must declare this mode!
|
||||
if (writes_depth()) {
|
||||
if (current_shader().writes_depth()) {
|
||||
b.addExecutionMode(mainFn, spv::ExecutionModeDepthReplacing);
|
||||
}
|
||||
|
||||
|
@ -667,8 +671,12 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
|||
return spirv_bytes;
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::PostTranslation(
|
||||
Shader::Translation& translation, bool setup_shader_post_translation_info) {
|
||||
void SpirvShaderTranslator::PostTranslation() {
|
||||
Shader::Translation& translation = current_translation();
|
||||
if (!translation.is_valid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Validation.
|
||||
if (cvars::spv_validate) {
|
||||
auto validation = validator_.Validate(
|
||||
|
|
|
@ -58,11 +58,23 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
SpirvShaderTranslator();
|
||||
~SpirvShaderTranslator() override;
|
||||
|
||||
// Not storing anything else in modifications (as this shader translator is
|
||||
// being replaced anyway).
|
||||
uint64_t GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
uint32_t dynamic_addressable_register_count,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex) const override {
|
||||
return dynamic_addressable_register_count;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual uint32_t GetModificationRegisterCount() const {
|
||||
return uint32_t(current_translation().modification());
|
||||
}
|
||||
void StartTranslation() override;
|
||||
std::vector<uint8_t> CompleteTranslation() override;
|
||||
void PostTranslation(Shader::Translation& translation,
|
||||
bool setup_shader_post_translation_info) override;
|
||||
void PostTranslation() override;
|
||||
|
||||
void PreProcessControlFlowInstructions(
|
||||
std::vector<ucode::ControlFlowInstruction> instrs) override;
|
||||
|
|
|
@ -431,15 +431,14 @@ XEPACKEDUNION(ControlFlowInstruction, {
|
|||
static_assert_size(ControlFlowInstruction, 8);
|
||||
|
||||
inline void UnpackControlFlowInstructions(const uint32_t* dwords,
|
||||
ControlFlowInstruction* out_a,
|
||||
ControlFlowInstruction* out_b) {
|
||||
ControlFlowInstruction* out_ab) {
|
||||
uint32_t dword_0 = dwords[0];
|
||||
uint32_t dword_1 = dwords[1];
|
||||
uint32_t dword_2 = dwords[2];
|
||||
out_a->dword_0 = dword_0;
|
||||
out_a->dword_1 = dword_1 & 0xFFFF;
|
||||
out_b->dword_0 = (dword_1 >> 16) | (dword_2 << 16);
|
||||
out_b->dword_1 = dword_2 >> 16;
|
||||
out_ab[0].dword_0 = dword_0;
|
||||
out_ab[0].dword_1 = dword_1 & 0xFFFF;
|
||||
out_ab[1].dword_0 = (dword_1 >> 16) | (dword_2 << 16);
|
||||
out_ab[1].dword_1 = dword_2 >> 16;
|
||||
}
|
||||
|
||||
enum class FetchOpcode : uint32_t {
|
||||
|
|
|
@ -364,10 +364,11 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state,
|
|||
}
|
||||
|
||||
bool PipelineCache::TranslateShader(
|
||||
VulkanShader::VulkanTranslation& translation, reg::SQ_PROGRAM_CNTL cntl) {
|
||||
VulkanShader::VulkanTranslation& translation) {
|
||||
translation.shader().AnalyzeUcode(ucode_disasm_buffer_);
|
||||
// Perform translation.
|
||||
// If this fails the shader will be marked as invalid and ignored later.
|
||||
if (!shader_translator_->Translate(translation, cntl)) {
|
||||
if (!shader_translator_->TranslateAnalyzedShader(translation)) {
|
||||
XELOGE("Shader translation failed; marking shader as ignored");
|
||||
return false;
|
||||
}
|
||||
|
@ -1071,9 +1072,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
static_cast<VulkanShader::VulkanTranslation*>(
|
||||
vertex_shader->GetOrCreateTranslation(
|
||||
shader_translator_->GetDefaultModification(
|
||||
xenos::ShaderType::kVertex)));
|
||||
xenos::ShaderType::kVertex,
|
||||
vertex_shader->GetDynamicAddressableRegisterCount(
|
||||
regs.sq_program_cntl.vs_num_reg))));
|
||||
if (!vertex_shader_translation->is_translated() &&
|
||||
!TranslateShader(*vertex_shader_translation, regs.sq_program_cntl)) {
|
||||
!TranslateShader(*vertex_shader_translation)) {
|
||||
XELOGE("Failed to translate the vertex shader!");
|
||||
return UpdateStatus::kError;
|
||||
}
|
||||
|
@ -1083,9 +1086,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
pixel_shader_translation = static_cast<VulkanShader::VulkanTranslation*>(
|
||||
pixel_shader->GetOrCreateTranslation(
|
||||
shader_translator_->GetDefaultModification(
|
||||
xenos::ShaderType::kPixel)));
|
||||
xenos::ShaderType::kPixel,
|
||||
pixel_shader->GetDynamicAddressableRegisterCount(
|
||||
regs.sq_program_cntl.ps_num_reg))));
|
||||
if (!pixel_shader_translation->is_translated() &&
|
||||
!TranslateShader(*pixel_shader_translation, regs.sq_program_cntl)) {
|
||||
!TranslateShader(*pixel_shader_translation)) {
|
||||
XELOGE("Failed to translate the pixel shader!");
|
||||
return UpdateStatus::kError;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
|
@ -78,8 +79,7 @@ class PipelineCache {
|
|||
// state.
|
||||
VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
|
||||
|
||||
bool TranslateShader(VulkanShader::VulkanTranslation& translation,
|
||||
reg::SQ_PROGRAM_CNTL cntl);
|
||||
bool TranslateShader(VulkanShader::VulkanTranslation& translation);
|
||||
|
||||
void DumpShaderDisasmAMD(VkPipeline pipeline);
|
||||
void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
|
||||
|
@ -92,6 +92,8 @@ class PipelineCache {
|
|||
RegisterFile* register_file_ = nullptr;
|
||||
ui::vulkan::VulkanDevice* device_ = nullptr;
|
||||
|
||||
// Temporary storage for AnalyzeUcode calls.
|
||||
StringBuffer ucode_disasm_buffer_;
|
||||
// Reusable shader translator.
|
||||
std::unique_ptr<ShaderTranslator> shader_translator_ = nullptr;
|
||||
// Disassembler used to get the SPIRV disasm. Only used in debug.
|
||||
|
|
|
@ -73,7 +73,7 @@ bool VulkanShader::VulkanTranslation::Prepare() {
|
|||
}
|
||||
|
||||
Shader::Translation* VulkanShader::CreateTranslationInstance(
|
||||
uint32_t modification) {
|
||||
uint64_t modification) {
|
||||
return new VulkanTranslation(*this, modification);
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ class VulkanShader : public Shader {
|
|||
public:
|
||||
class VulkanTranslation : public Translation {
|
||||
public:
|
||||
VulkanTranslation(VulkanShader& shader, uint32_t modification)
|
||||
VulkanTranslation(VulkanShader& shader, uint64_t modification)
|
||||
: Translation(shader, modification) {}
|
||||
~VulkanTranslation() override;
|
||||
|
||||
|
@ -41,7 +41,7 @@ class VulkanShader : public Shader {
|
|||
uint32_t dword_count);
|
||||
|
||||
protected:
|
||||
Translation* CreateTranslationInstance(uint32_t modification) override;
|
||||
Translation* CreateTranslationInstance(uint64_t modification) override;
|
||||
|
||||
private:
|
||||
ui::vulkan::VulkanDevice* device_ = nullptr;
|
||||
|
|
|
@ -546,33 +546,6 @@ inline int GetVertexFormatComponentCount(VertexFormat format) {
|
|||
}
|
||||
}
|
||||
|
||||
inline int GetVertexFormatSizeInWords(VertexFormat format) {
|
||||
switch (format) {
|
||||
case VertexFormat::k_8_8_8_8:
|
||||
case VertexFormat::k_2_10_10_10:
|
||||
case VertexFormat::k_10_11_11:
|
||||
case VertexFormat::k_11_11_10:
|
||||
case VertexFormat::k_16_16:
|
||||
case VertexFormat::k_16_16_FLOAT:
|
||||
case VertexFormat::k_32:
|
||||
case VertexFormat::k_32_FLOAT:
|
||||
return 1;
|
||||
case VertexFormat::k_16_16_16_16:
|
||||
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||
case VertexFormat::k_32_32:
|
||||
case VertexFormat::k_32_32_FLOAT:
|
||||
return 2;
|
||||
case VertexFormat::k_32_32_32_FLOAT:
|
||||
return 3;
|
||||
case VertexFormat::k_32_32_32_32:
|
||||
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||
return 4;
|
||||
default:
|
||||
assert_unhandled_case(format);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t GetVertexFormatNeededWords(VertexFormat format,
|
||||
uint32_t used_components) {
|
||||
assert_zero(used_components & ~uint32_t(0b1111));
|
||||
|
|
Loading…
Reference in New Issue