Merge branch 'master' into vulkan
This commit is contained in:
commit
775b4623dc
|
@ -777,12 +777,12 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
uint32_t resolution_scale_x =
|
||||
texture_cache_ ? texture_cache_->GetDrawResolutionScaleX() : 1;
|
||||
uint32_t resolution_scale_y =
|
||||
texture_cache_ ? texture_cache_->GetDrawResolutionScaleY() : 1;
|
||||
if (resolution_scale_x > 1 || resolution_scale_y > 1) {
|
||||
title << ' ' << resolution_scale_x << 'x' << resolution_scale_y;
|
||||
uint32_t draw_resolution_scale_x =
|
||||
texture_cache_ ? texture_cache_->draw_resolution_scale_x() : 1;
|
||||
uint32_t draw_resolution_scale_y =
|
||||
texture_cache_ ? texture_cache_->draw_resolution_scale_y() : 1;
|
||||
if (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) {
|
||||
title << ' ' << draw_resolution_scale_x << 'x' << draw_resolution_scale_y;
|
||||
}
|
||||
}
|
||||
return title.str();
|
||||
|
@ -845,11 +845,28 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
cvars::d3d12_bindless &&
|
||||
provider.GetResourceBindingTier() >= D3D12_RESOURCE_BINDING_TIER_2;
|
||||
|
||||
// Get the draw resolution scale for the render target cache and the texture
|
||||
// cache.
|
||||
uint32_t draw_resolution_scale_x, draw_resolution_scale_y;
|
||||
bool draw_resolution_scale_not_clamped =
|
||||
TextureCache::GetConfigDrawResolutionScale(draw_resolution_scale_x,
|
||||
draw_resolution_scale_y);
|
||||
if (!D3D12TextureCache::ClampDrawResolutionScaleToMaxSupported(
|
||||
draw_resolution_scale_x, draw_resolution_scale_y, provider)) {
|
||||
draw_resolution_scale_not_clamped = false;
|
||||
}
|
||||
if (!draw_resolution_scale_not_clamped) {
|
||||
XELOGW(
|
||||
"The requested draw resolution scale is not supported by the device or "
|
||||
"the emulator, reducing to {}x{}",
|
||||
draw_resolution_scale_x, draw_resolution_scale_y);
|
||||
}
|
||||
|
||||
// Initialize the render target cache before configuring binding - need to
|
||||
// know if using rasterizer-ordered views for the bindless root signature.
|
||||
render_target_cache_ = std::make_unique<D3D12RenderTargetCache>(
|
||||
*register_file_, *memory_, trace_writer_, *this,
|
||||
bindless_resources_used_);
|
||||
*register_file_, *memory_, trace_writer_, draw_resolution_scale_x,
|
||||
draw_resolution_scale_y, *this, bindless_resources_used_);
|
||||
if (!render_target_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize the render target cache");
|
||||
return false;
|
||||
|
@ -1141,11 +1158,10 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
return false;
|
||||
}
|
||||
|
||||
texture_cache_ = std::make_unique<TextureCache>(
|
||||
*this, *register_file_, *shared_memory_, bindless_resources_used_,
|
||||
render_target_cache_->GetResolutionScaleX(),
|
||||
render_target_cache_->GetResolutionScaleY());
|
||||
if (!texture_cache_->Initialize()) {
|
||||
texture_cache_ = D3D12TextureCache::Create(
|
||||
*register_file_, *shared_memory_, draw_resolution_scale_x,
|
||||
draw_resolution_scale_y, *this, bindless_resources_used_);
|
||||
if (!texture_cache_) {
|
||||
XELOGE("Failed to initialize the texture cache");
|
||||
return false;
|
||||
}
|
||||
|
@ -1741,12 +1757,12 @@ void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
|
|||
}
|
||||
D3D12_RESOURCE_DESC swap_texture_desc = swap_texture_resource->GetDesc();
|
||||
|
||||
uint32_t resolution_scale_max =
|
||||
std::max(texture_cache_->GetDrawResolutionScaleX(),
|
||||
texture_cache_->GetDrawResolutionScaleY());
|
||||
uint32_t draw_resolution_scale_max =
|
||||
std::max(texture_cache_->draw_resolution_scale_x(),
|
||||
texture_cache_->draw_resolution_scale_y());
|
||||
presenter->RefreshGuestOutput(
|
||||
uint32_t(swap_texture_desc.Width), uint32_t(swap_texture_desc.Height),
|
||||
1280 * resolution_scale_max, 720 * resolution_scale_max,
|
||||
1280 * draw_resolution_scale_max, 720 * draw_resolution_scale_max,
|
||||
[this, &swap_texture_srv_desc, frontbuffer_format, swap_texture_resource,
|
||||
&swap_texture_desc](
|
||||
ui::Presenter::GuestOutputRefreshContext& context) -> bool {
|
||||
|
@ -2233,13 +2249,13 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
}
|
||||
|
||||
// Get dynamic rasterizer state.
|
||||
uint32_t resolution_scale_x = texture_cache_->GetDrawResolutionScaleX();
|
||||
uint32_t resolution_scale_y = texture_cache_->GetDrawResolutionScaleY();
|
||||
uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x();
|
||||
uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y();
|
||||
RenderTargetCache::DepthFloat24Conversion depth_float24_conversion =
|
||||
render_target_cache_->depth_float24_conversion();
|
||||
draw_util::ViewportInfo viewport_info;
|
||||
draw_util::GetHostViewportInfo(
|
||||
regs, resolution_scale_x, resolution_scale_y, true,
|
||||
regs, draw_resolution_scale_x, draw_resolution_scale_y, true,
|
||||
D3D12_VIEWPORT_BOUNDS_MAX, D3D12_VIEWPORT_BOUNDS_MAX, false,
|
||||
normalized_depth_control,
|
||||
host_render_targets_used &&
|
||||
|
@ -2251,10 +2267,10 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
viewport_info);
|
||||
draw_util::Scissor scissor;
|
||||
draw_util::GetScissor(regs, scissor);
|
||||
scissor.offset[0] *= resolution_scale_x;
|
||||
scissor.offset[1] *= resolution_scale_y;
|
||||
scissor.extent[0] *= resolution_scale_x;
|
||||
scissor.extent[1] *= resolution_scale_y;
|
||||
scissor.offset[0] *= draw_resolution_scale_x;
|
||||
scissor.offset[1] *= draw_resolution_scale_y;
|
||||
scissor.extent[0] *= draw_resolution_scale_x;
|
||||
scissor.extent[1] *= draw_resolution_scale_y;
|
||||
|
||||
// Update viewport, scissor, blend factor and stencil reference.
|
||||
UpdateFixedFunctionState(viewport_info, scissor, primitive_polygonal,
|
||||
|
@ -2774,6 +2790,8 @@ void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
|
|||
primitive_processor_->CompletedSubmissionUpdated();
|
||||
|
||||
render_target_cache_->CompletedSubmissionUpdated();
|
||||
|
||||
texture_cache_->CompletedSubmissionUpdated(submission_completed_);
|
||||
}
|
||||
|
||||
bool D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
||||
|
@ -2856,7 +2874,7 @@ bool D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
|||
|
||||
render_target_cache_->BeginSubmission();
|
||||
|
||||
texture_cache_->BeginSubmission();
|
||||
texture_cache_->BeginSubmission(submission_current_);
|
||||
}
|
||||
|
||||
if (is_opening_frame) {
|
||||
|
@ -3166,8 +3184,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
|
||||
bool edram_rov_used = render_target_cache_->GetPath() ==
|
||||
RenderTargetCache::Path::kPixelShaderInterlock;
|
||||
uint32_t resolution_scale_x = texture_cache_->GetDrawResolutionScaleX();
|
||||
uint32_t resolution_scale_y = texture_cache_->GetDrawResolutionScaleY();
|
||||
uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x();
|
||||
uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y();
|
||||
|
||||
// Get the color info register values for each render target. Also, for ROV,
|
||||
// exclude components that don't exist in the format from the write mask.
|
||||
|
@ -3381,10 +3399,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// radius conversion to avoid multiplying the per-vertex diameter by an
|
||||
// additional constant in the shader.
|
||||
float point_screen_diameter_to_ndc_radius_x =
|
||||
(/* 0.5f * 2.0f * */ float(resolution_scale_x)) /
|
||||
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_x)) /
|
||||
std::max(viewport_info.xy_extent[0], uint32_t(1));
|
||||
float point_screen_diameter_to_ndc_radius_y =
|
||||
(/* 0.5f * 2.0f * */ float(resolution_scale_y)) /
|
||||
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_y)) /
|
||||
std::max(viewport_info.xy_extent[1], uint32_t(1));
|
||||
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] !=
|
||||
point_screen_diameter_to_ndc_radius_x;
|
||||
|
@ -3457,9 +3475,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
dirty |= system_constants_.alpha_to_mask != alpha_to_mask;
|
||||
system_constants_.alpha_to_mask = alpha_to_mask;
|
||||
|
||||
uint32_t edram_tile_dwords_scaled = xenos::kEdramTileWidthSamples *
|
||||
xenos::kEdramTileHeightSamples *
|
||||
(resolution_scale_x * resolution_scale_y);
|
||||
uint32_t edram_tile_dwords_scaled =
|
||||
xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples *
|
||||
(draw_resolution_scale_x * draw_resolution_scale_y);
|
||||
|
||||
// EDRAM pitch for ROV writing.
|
||||
if (edram_rov_used) {
|
||||
|
@ -3571,7 +3589,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// background is more likely.
|
||||
float poly_offset_scale_factor =
|
||||
xenos::kPolygonOffsetScaleSubpixelUnit *
|
||||
std::max(resolution_scale_x, resolution_scale_y);
|
||||
std::max(draw_resolution_scale_x, draw_resolution_scale_y);
|
||||
poly_offset_front_scale *= poly_offset_scale_factor;
|
||||
poly_offset_back_scale *= poly_offset_scale_factor;
|
||||
dirty |= system_constants_.edram_poly_offset_front_scale !=
|
||||
|
@ -3879,7 +3897,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
current_samplers_vertex_.resize(
|
||||
std::max(current_samplers_vertex_.size(), sampler_count_vertex));
|
||||
for (size_t i = 0; i < sampler_count_vertex; ++i) {
|
||||
TextureCache::SamplerParameters parameters =
|
||||
D3D12TextureCache::SamplerParameters parameters =
|
||||
texture_cache_->GetSamplerParameters(samplers_vertex[i]);
|
||||
if (current_samplers_vertex_[i] != parameters) {
|
||||
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
||||
|
@ -3911,7 +3929,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
current_samplers_pixel_.resize(std::max(current_samplers_pixel_.size(),
|
||||
size_t(sampler_count_pixel)));
|
||||
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
|
||||
TextureCache::SamplerParameters parameters =
|
||||
D3D12TextureCache::SamplerParameters parameters =
|
||||
texture_cache_->GetSamplerParameters((*samplers_pixel)[i]);
|
||||
if (current_samplers_pixel_[i] != parameters) {
|
||||
current_samplers_pixel_[i] = parameters;
|
||||
|
@ -4018,7 +4036,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
std::max(current_sampler_bindless_indices_vertex_.size(),
|
||||
size_t(sampler_count_vertex)));
|
||||
for (uint32_t j = 0; j < sampler_count_vertex; ++j) {
|
||||
TextureCache::SamplerParameters sampler_parameters =
|
||||
D3D12TextureCache::SamplerParameters sampler_parameters =
|
||||
current_samplers_vertex_[j];
|
||||
uint32_t sampler_index;
|
||||
auto it = texture_cache_bindless_sampler_map_.find(
|
||||
|
@ -4050,7 +4068,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
std::max(current_sampler_bindless_indices_pixel_.size(),
|
||||
size_t(sampler_count_pixel)));
|
||||
for (uint32_t j = 0; j < sampler_count_pixel; ++j) {
|
||||
TextureCache::SamplerParameters sampler_parameters =
|
||||
D3D12TextureCache::SamplerParameters sampler_parameters =
|
||||
current_samplers_pixel_[j];
|
||||
uint32_t sampler_index;
|
||||
auto it = texture_cache_bindless_sampler_map_.find(
|
||||
|
|
|
@ -24,9 +24,9 @@
|
|||
#include "xenia/gpu/d3d12/d3d12_primitive_processor.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_render_target_cache.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shared_memory.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_texture_cache.h"
|
||||
#include "xenia/gpu/d3d12/deferred_command_list.h"
|
||||
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
||||
#include "xenia/gpu/d3d12/texture_cache.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/dxbc_shader.h"
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
|
@ -482,7 +482,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// number (so checking if the first can be reused is enough).
|
||||
std::deque<std::pair<ID3D12DescriptorHeap*, uint64_t>>
|
||||
sampler_bindless_heaps_overflowed_;
|
||||
// TextureCache::SamplerParameters::value -> indices within the current
|
||||
// D3D12TextureCache::SamplerParameters::value -> indices within the current
|
||||
// bindless sampler heap.
|
||||
std::unordered_map<uint32_t, uint32_t> texture_cache_bindless_sampler_map_;
|
||||
|
||||
|
@ -497,7 +497,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
|
||||
std::unique_ptr<PipelineCache> pipeline_cache_;
|
||||
|
||||
std::unique_ptr<TextureCache> texture_cache_;
|
||||
std::unique_ptr<D3D12TextureCache> texture_cache_;
|
||||
|
||||
// Bytes 0x0...0x3FF - 256-entry gamma ramp table with B10G10R10X2 data (read
|
||||
// as R10G10B10X2 with swizzle).
|
||||
|
@ -648,10 +648,11 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// Size of these should be ignored when checking whether these are up to date,
|
||||
// layout UID should be checked first (they will be different for different
|
||||
// binding counts).
|
||||
std::vector<TextureCache::TextureSRVKey> current_texture_srv_keys_vertex_;
|
||||
std::vector<TextureCache::TextureSRVKey> current_texture_srv_keys_pixel_;
|
||||
std::vector<TextureCache::SamplerParameters> current_samplers_vertex_;
|
||||
std::vector<TextureCache::SamplerParameters> current_samplers_pixel_;
|
||||
std::vector<D3D12TextureCache::TextureSRVKey>
|
||||
current_texture_srv_keys_vertex_;
|
||||
std::vector<D3D12TextureCache::TextureSRVKey> current_texture_srv_keys_pixel_;
|
||||
std::vector<D3D12TextureCache::SamplerParameters> current_samplers_vertex_;
|
||||
std::vector<D3D12TextureCache::SamplerParameters> current_samplers_pixel_;
|
||||
std::vector<uint32_t> current_sampler_bindless_indices_vertex_;
|
||||
std::vector<uint32_t> current_sampler_bindless_indices_pixel_;
|
||||
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_texture_cache.h"
|
||||
#include "xenia/gpu/d3d12/deferred_command_list.h"
|
||||
#include "xenia/gpu/d3d12/texture_cache.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/dxbc.h"
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
|
@ -250,35 +250,10 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
path_ = Path::kHostRenderTargets;
|
||||
}
|
||||
|
||||
uint32_t config_resolution_scale_x =
|
||||
uint32_t(std::max(cvars::draw_resolution_scale_x, int32_t(1)));
|
||||
uint32_t config_resolution_scale_y =
|
||||
uint32_t(std::max(cvars::draw_resolution_scale_y, int32_t(1)));
|
||||
// Hard limit, originating from the half-pixel offset (two-pixel offset is too
|
||||
// much, the resolve shaders, being generic for different scales, only
|
||||
// duplicate the second pixel into the first, not the third), and also due to
|
||||
// the bit counts used for passing the scale to shaders, and hardcoded scales
|
||||
// and shifts for fast division by integer constants.
|
||||
const uint32_t kMaxResolutionScale = 3;
|
||||
resolution_scale_x_ =
|
||||
std::min(config_resolution_scale_x, kMaxResolutionScale);
|
||||
resolution_scale_y_ =
|
||||
std::min(config_resolution_scale_y, kMaxResolutionScale);
|
||||
TextureCache::ClampDrawResolutionScaleToSupportedRange(
|
||||
resolution_scale_x_, resolution_scale_y_, provider);
|
||||
if (resolution_scale_x_ != config_resolution_scale_x ||
|
||||
resolution_scale_y_ != config_resolution_scale_y) {
|
||||
XELOGW(
|
||||
"D3D12RenderTargetCache: {}x{} resolution scale not supported by the "
|
||||
"device or the emulator, reducing to {}x{}",
|
||||
config_resolution_scale_x, config_resolution_scale_y,
|
||||
resolution_scale_x_, resolution_scale_y_);
|
||||
}
|
||||
bool resolution_scaled = resolution_scale_x_ > 1 || resolution_scale_y_ > 1;
|
||||
|
||||
// Create the buffer for reinterpreting EDRAM contents.
|
||||
uint32_t edram_buffer_size =
|
||||
xenos::kEdramSizeBytes * resolution_scale_x_ * resolution_scale_y_;
|
||||
xenos::kEdramSizeBytes *
|
||||
(draw_resolution_scale_x() * draw_resolution_scale_y());
|
||||
D3D12_RESOURCE_DESC edram_buffer_desc;
|
||||
ui::d3d12::util::FillBufferResourceDesc(
|
||||
edram_buffer_desc, edram_buffer_size,
|
||||
|
@ -369,6 +344,8 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
uint32_t(EdramBufferDescriptorIndex::kR32G32B32A32UintUAV)),
|
||||
edram_buffer_, DXGI_FORMAT_R32G32B32A32_UINT, edram_buffer_size >> 4);
|
||||
|
||||
bool draw_resolution_scaled = IsDrawResolutionScaled();
|
||||
|
||||
// Create the resolve copying root signature.
|
||||
D3D12_ROOT_PARAMETER resolve_copy_root_parameters[4];
|
||||
// Parameter 0 is constants.
|
||||
|
@ -379,7 +356,7 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
// Binding all of the shared memory at 1x resolution, portions with scaled
|
||||
// resolution.
|
||||
resolve_copy_root_parameters[0].Constants.Num32BitValues =
|
||||
(IsResolutionScaled()
|
||||
(draw_resolution_scaled
|
||||
? sizeof(draw_util::ResolveCopyShaderConstants::DestRelative)
|
||||
: sizeof(draw_util::ResolveCopyShaderConstants)) /
|
||||
sizeof(uint32_t);
|
||||
|
@ -414,7 +391,7 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
resolve_copy_root_parameters[2].ShaderVisibility =
|
||||
D3D12_SHADER_VISIBILITY_ALL;
|
||||
// Parameter 3 is the resolution scale.
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
resolve_copy_root_parameters[3].ParameterType =
|
||||
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
resolve_copy_root_parameters[3].Constants.ShaderRegister = 1;
|
||||
|
@ -427,7 +404,8 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
D3D12_SHADER_VISIBILITY_ALL;
|
||||
}
|
||||
D3D12_ROOT_SIGNATURE_DESC resolve_copy_root_signature_desc;
|
||||
resolve_copy_root_signature_desc.NumParameters = resolution_scaled ? 4 : 3;
|
||||
resolve_copy_root_signature_desc.NumParameters =
|
||||
draw_resolution_scaled ? 4 : 3;
|
||||
resolve_copy_root_signature_desc.pParameters = resolve_copy_root_parameters;
|
||||
resolve_copy_root_signature_desc.NumStaticSamplers = 0;
|
||||
resolve_copy_root_signature_desc.pStaticSamplers = nullptr;
|
||||
|
@ -457,9 +435,9 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
ID3D12PipelineState* resolve_copy_pipeline =
|
||||
ui::d3d12::util::CreateComputePipeline(
|
||||
device,
|
||||
resolution_scaled ? resolve_copy_shader_code.scaled
|
||||
draw_resolution_scaled ? resolve_copy_shader_code.scaled
|
||||
: resolve_copy_shader_code.unscaled,
|
||||
resolution_scaled ? resolve_copy_shader_code.scaled_size
|
||||
draw_resolution_scaled ? resolve_copy_shader_code.scaled_size
|
||||
: resolve_copy_shader_code.unscaled_size,
|
||||
resolve_copy_root_signature_);
|
||||
if (resolve_copy_pipeline == nullptr) {
|
||||
|
@ -1081,7 +1059,7 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
resolve_rov_clear_root_parameters[1].ShaderVisibility =
|
||||
D3D12_SHADER_VISIBILITY_ALL;
|
||||
// Parameter 2 is the resolution scale.
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
resolve_rov_clear_root_parameters[2].ParameterType =
|
||||
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
resolve_rov_clear_root_parameters[2].Constants.ShaderRegister = 1;
|
||||
|
@ -1095,7 +1073,7 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
}
|
||||
D3D12_ROOT_SIGNATURE_DESC resolve_rov_clear_root_signature_desc;
|
||||
resolve_rov_clear_root_signature_desc.NumParameters =
|
||||
resolution_scaled ? 3 : 2;
|
||||
draw_resolution_scaled ? 3 : 2;
|
||||
resolve_rov_clear_root_signature_desc.pParameters =
|
||||
resolve_rov_clear_root_parameters;
|
||||
resolve_rov_clear_root_signature_desc.NumStaticSamplers = 0;
|
||||
|
@ -1115,9 +1093,9 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
// Create the resolve EDRAM buffer clearing pipelines.
|
||||
resolve_rov_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
||||
device,
|
||||
resolution_scaled ? shaders::resolve_clear_32bpp_scaled_cs
|
||||
draw_resolution_scaled ? shaders::resolve_clear_32bpp_scaled_cs
|
||||
: shaders::resolve_clear_32bpp_cs,
|
||||
resolution_scaled ? sizeof(shaders::resolve_clear_32bpp_scaled_cs)
|
||||
draw_resolution_scaled ? sizeof(shaders::resolve_clear_32bpp_scaled_cs)
|
||||
: sizeof(shaders::resolve_clear_32bpp_cs),
|
||||
resolve_rov_clear_root_signature_);
|
||||
if (resolve_rov_clear_32bpp_pipeline_ == nullptr) {
|
||||
|
@ -1130,9 +1108,9 @@ bool D3D12RenderTargetCache::Initialize() {
|
|||
resolve_rov_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
|
||||
resolve_rov_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
||||
device,
|
||||
resolution_scaled ? shaders::resolve_clear_64bpp_scaled_cs
|
||||
draw_resolution_scaled ? shaders::resolve_clear_64bpp_scaled_cs
|
||||
: shaders::resolve_clear_64bpp_cs,
|
||||
resolution_scaled ? sizeof(shaders::resolve_clear_64bpp_scaled_cs)
|
||||
draw_resolution_scaled ? sizeof(shaders::resolve_clear_64bpp_scaled_cs)
|
||||
: sizeof(shaders::resolve_clear_64bpp_cs),
|
||||
resolve_rov_clear_root_signature_);
|
||||
if (resolve_rov_clear_64bpp_pipeline_ == nullptr) {
|
||||
|
@ -1366,17 +1344,17 @@ void D3D12RenderTargetCache::WriteEdramUintPow2UAVDescriptor(
|
|||
|
||||
bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
||||
D3D12SharedMemory& shared_memory,
|
||||
TextureCache& texture_cache,
|
||||
D3D12TextureCache& texture_cache,
|
||||
uint32_t& written_address_out,
|
||||
uint32_t& written_length_out) {
|
||||
written_address_out = 0;
|
||||
written_length_out = 0;
|
||||
|
||||
bool resolution_scaled = IsResolutionScaled();
|
||||
bool draw_resolution_scaled = IsDrawResolutionScaled();
|
||||
|
||||
draw_util::ResolveInfo resolve_info;
|
||||
if (!draw_util::GetResolveInfo(
|
||||
register_file(), memory, trace_writer_, resolution_scaled,
|
||||
register_file(), memory, trace_writer_, draw_resolution_scaled,
|
||||
IsFixed16TruncatedToMinus1To1(), resolve_info)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -1387,8 +1365,8 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
}
|
||||
|
||||
draw_util::ResolveResolutionScaleConstant resolution_scale_constant;
|
||||
resolution_scale_constant.resolution_scale_x = resolution_scale_x_;
|
||||
resolution_scale_constant.resolution_scale_y = resolution_scale_y_;
|
||||
resolution_scale_constant.resolution_scale_x = draw_resolution_scale_x();
|
||||
resolution_scale_constant.resolution_scale_y = draw_resolution_scale_y();
|
||||
|
||||
DeferredCommandList& command_list =
|
||||
command_processor_.GetDeferredCommandList();
|
||||
|
@ -1413,8 +1391,8 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
draw_util::ResolveCopyShaderConstants copy_shader_constants;
|
||||
uint32_t copy_group_count_x, copy_group_count_y;
|
||||
draw_util::ResolveCopyShaderIndex copy_shader = resolve_info.GetCopyShader(
|
||||
resolution_scale_x_, resolution_scale_y_, copy_shader_constants,
|
||||
copy_group_count_x, copy_group_count_y);
|
||||
draw_resolution_scale_x(), draw_resolution_scale_y(),
|
||||
copy_shader_constants, copy_group_count_x, copy_group_count_y);
|
||||
assert_true(copy_group_count_x && copy_group_count_y);
|
||||
if (copy_shader != draw_util::ResolveCopyShaderIndex::kUnknown) {
|
||||
const draw_util::ResolveCopyShaderInfo& copy_shader_info =
|
||||
|
@ -1422,7 +1400,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
|
||||
// Make sure there is memory to write to.
|
||||
bool copy_dest_committed;
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
copy_dest_committed =
|
||||
texture_cache.EnsureScaledResolveMemoryCommitted(
|
||||
resolve_info.copy_dest_base, resolve_info.copy_dest_length) &&
|
||||
|
@ -1441,10 +1419,10 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_source;
|
||||
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptors[2];
|
||||
if (command_processor_.RequestOneUseSingleViewDescriptors(
|
||||
bindless_resources_used_ ? uint32_t(resolution_scaled) : 2,
|
||||
bindless_resources_used_ ? uint32_t(draw_resolution_scaled) : 2,
|
||||
descriptors)) {
|
||||
if (bindless_resources_used_) {
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
descriptor_dest = descriptors[0];
|
||||
} else {
|
||||
descriptor_dest =
|
||||
|
@ -1463,7 +1441,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
}
|
||||
} else {
|
||||
descriptor_dest = descriptors[0];
|
||||
if (!resolution_scaled) {
|
||||
if (!draw_resolution_scaled) {
|
||||
shared_memory.WriteUintPow2UAVDescriptor(
|
||||
descriptor_dest.first, copy_shader_info.dest_bpe_log2);
|
||||
}
|
||||
|
@ -1475,7 +1453,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
copy_shader_info.source_bpe_log2);
|
||||
}
|
||||
}
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
texture_cache.CreateCurrentScaledResolveRangeUintPow2UAV(
|
||||
descriptor_dest.first, copy_shader_info.dest_bpe_log2);
|
||||
texture_cache.TransitionCurrentScaledResolveRange(
|
||||
|
@ -1487,7 +1465,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
|
||||
// Submit the resolve.
|
||||
command_list.D3DSetComputeRootSignature(resolve_copy_root_signature_);
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
command_list.D3DSetComputeRoot32BitConstants(
|
||||
3, sizeof(resolution_scale_constant) / sizeof(uint32_t),
|
||||
&resolution_scale_constant, 0);
|
||||
|
@ -1496,7 +1474,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
2, descriptor_source.second);
|
||||
command_list.D3DSetComputeRootDescriptorTable(1,
|
||||
descriptor_dest.second);
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
command_list.D3DSetComputeRoot32BitConstants(
|
||||
0,
|
||||
sizeof(copy_shader_constants.dest_relative) / sizeof(uint32_t),
|
||||
|
@ -1512,7 +1490,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
|
||||
|
||||
// Order the resolve with other work using the destination as a UAV.
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
texture_cache.MarkCurrentScaledResolveRangeUAVWritesCommitNeeded();
|
||||
} else {
|
||||
shared_memory.MarkUAVWritesCommitNeeded();
|
||||
|
@ -1585,7 +1563,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
CommitEdramBufferUAVWrites();
|
||||
command_list.D3DSetComputeRootSignature(
|
||||
resolve_rov_clear_root_signature_);
|
||||
if (resolution_scaled) {
|
||||
if (draw_resolution_scaled) {
|
||||
command_list.D3DSetComputeRoot32BitConstants(
|
||||
2, sizeof(resolution_scale_constant) / sizeof(uint32_t),
|
||||
&resolution_scale_constant, 0);
|
||||
|
@ -1593,8 +1571,8 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
command_list.D3DSetComputeRootDescriptorTable(
|
||||
1, descriptor_edram.second);
|
||||
std::pair<uint32_t, uint32_t> clear_group_count =
|
||||
resolve_info.GetClearShaderGroupCount(resolution_scale_x_,
|
||||
resolution_scale_y_);
|
||||
resolve_info.GetClearShaderGroupCount(draw_resolution_scale_x(),
|
||||
draw_resolution_scale_y());
|
||||
assert_true(clear_group_count.first && clear_group_count.second);
|
||||
if (clear_depth) {
|
||||
draw_util::ResolveClearShaderConstants depth_clear_constants;
|
||||
|
@ -1648,7 +1626,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
|||
}
|
||||
|
||||
bool D3D12RenderTargetCache::InitializeTraceSubmitDownloads() {
|
||||
if (IsResolutionScaled()) {
|
||||
if (IsDrawResolutionScaled()) {
|
||||
// No 1:1 mapping.
|
||||
return false;
|
||||
}
|
||||
|
@ -1704,7 +1682,7 @@ void D3D12RenderTargetCache::InitializeTraceCompleteDownloads() {
|
|||
}
|
||||
|
||||
void D3D12RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) {
|
||||
if (IsResolutionScaled()) {
|
||||
if (IsDrawResolutionScaled()) {
|
||||
// No 1:1 mapping.
|
||||
return;
|
||||
}
|
||||
|
@ -1962,10 +1940,10 @@ RenderTargetCache::RenderTarget* D3D12RenderTargetCache::CreateRenderTarget(
|
|||
D3D12_RESOURCE_DESC resource_desc;
|
||||
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||
resource_desc.Alignment = 0;
|
||||
resource_desc.Width = key.GetWidth() * resolution_scale_x_;
|
||||
resource_desc.Width = key.GetWidth() * draw_resolution_scale_x();
|
||||
resource_desc.Height =
|
||||
GetRenderTargetHeight(key.pitch_tiles_at_32bpp, key.msaa_samples) *
|
||||
resolution_scale_y_;
|
||||
draw_resolution_scale_y();
|
||||
resource_desc.DepthOrArraySize = 1;
|
||||
resource_desc.MipLevels = 1;
|
||||
if (key.is_depth) {
|
||||
|
@ -2963,10 +2941,13 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// for the coordinates for that load. Currently 3 temps are enough.
|
||||
a.OpDclTemps(3);
|
||||
|
||||
uint32_t draw_resolution_scale_x = this->draw_resolution_scale_x();
|
||||
uint32_t draw_resolution_scale_y = this->draw_resolution_scale_y();
|
||||
|
||||
uint32_t tile_width_samples_scaled =
|
||||
xenos::kEdramTileWidthSamples * resolution_scale_x_;
|
||||
xenos::kEdramTileWidthSamples * draw_resolution_scale_x;
|
||||
uint32_t tile_height_samples_scaled =
|
||||
xenos::kEdramTileHeightSamples * resolution_scale_y_;
|
||||
xenos::kEdramTileHeightSamples * draw_resolution_scale_y;
|
||||
|
||||
// Split the destination pixel index into 32bpp tile in r0.z and
|
||||
// 32bpp-tile-relative pixel index in r0.xy.
|
||||
|
@ -2979,12 +2960,16 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k2X);
|
||||
uint32_t dest_tile_width_divide_scale, dest_tile_width_divide_upper_shift;
|
||||
draw_util::GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
resolution_scale_x_, dest_tile_width_divide_scale,
|
||||
draw_resolution_scale_x, dest_tile_width_divide_scale,
|
||||
dest_tile_width_divide_upper_shift);
|
||||
assert_true(dest_tile_width_divide_upper_shift >= dest_sample_width_log2);
|
||||
// Need the host tile size in pixels, not samples.
|
||||
dest_tile_width_divide_upper_shift -= dest_sample_width_log2;
|
||||
if (resolution_scale_y_ == 3) {
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"D3D12RenderTargetCache EDRAM range ownership transfer shader generation "
|
||||
"supports Y draw resolution scaling factors of only up to 3");
|
||||
if (draw_resolution_scale_y == 3) {
|
||||
// r0.zw = upper 32 bits in the division process of pixel XY by pixel count
|
||||
// in a 32bpp tile
|
||||
a.OpUMul(dxbc::Dest::R(0, 0b1100), dxbc::Dest::Null(),
|
||||
|
@ -3000,14 +2985,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
a.OpIMAd(
|
||||
dxbc::Dest::R(0, 0b0011), dxbc::Src::R(0, 0b1110),
|
||||
dxbc::Src::LI(
|
||||
-int32_t((80 * resolution_scale_x_) >> dest_sample_width_log2),
|
||||
-int32_t((16 * resolution_scale_y_) >> dest_sample_height_log2), 0,
|
||||
0),
|
||||
-int32_t((80 * draw_resolution_scale_x) >> dest_sample_width_log2),
|
||||
-int32_t((16 * draw_resolution_scale_y) >> dest_sample_height_log2),
|
||||
0, 0),
|
||||
dxbc::Src::R(0, 0b0100));
|
||||
} else {
|
||||
assert_true(resolution_scale_y_ <= 2);
|
||||
assert_true(draw_resolution_scale_y <= 2);
|
||||
uint32_t dest_tile_height_pixels_log2 =
|
||||
(resolution_scale_y_ == 2 ? 5 : 4) - dest_sample_height_log2;
|
||||
(draw_resolution_scale_y == 2 ? 5 : 4) - dest_sample_height_log2;
|
||||
// r0.z = upper 32 bits in the division process of pixel X by pixel count in
|
||||
// a 32bpp tile
|
||||
a.OpUMul(dxbc::Dest::R(0, 0b0100), dxbc::Dest::Null(),
|
||||
|
@ -3019,7 +3004,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dest_tile_height_pixels_log2));
|
||||
// r0.x = destination pixel X index within the 32bpp tile
|
||||
a.OpIMAd(dxbc::Dest::R(0, 0b0001), dxbc::Src::R(0, dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LI(-int32_t((80 * resolution_scale_x_) >>
|
||||
dxbc::Src::LI(-int32_t((80 * draw_resolution_scale_x) >>
|
||||
dest_sample_width_log2)),
|
||||
dxbc::Src::R(0, dxbc::Src::kXXXX));
|
||||
// r0.y = destination pixel Y index within the 32bpp tile
|
||||
|
@ -4518,15 +4503,15 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
|
|||
// Assuming the rectangle is already clamped by the setup function from the
|
||||
// common render target cache.
|
||||
clear_rect.left =
|
||||
LONG(resolve_clear_rectangle->x_pixels * resolution_scale_x_);
|
||||
LONG(resolve_clear_rectangle->x_pixels * draw_resolution_scale_x());
|
||||
clear_rect.top =
|
||||
LONG(resolve_clear_rectangle->y_pixels * resolution_scale_y_);
|
||||
LONG(resolve_clear_rectangle->y_pixels * draw_resolution_scale_y());
|
||||
clear_rect.right = LONG((resolve_clear_rectangle->x_pixels +
|
||||
resolve_clear_rectangle->width_pixels) *
|
||||
resolution_scale_x_);
|
||||
draw_resolution_scale_x());
|
||||
clear_rect.bottom = LONG((resolve_clear_rectangle->y_pixels +
|
||||
resolve_clear_rectangle->height_pixels) *
|
||||
resolution_scale_y_);
|
||||
draw_resolution_scale_y());
|
||||
}
|
||||
|
||||
// Do host depth storing for the depth destination (assuming there can be only
|
||||
|
@ -4811,8 +4796,8 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
|
|||
bool transfer_viewport_set = false;
|
||||
float pixels_to_ndc_unscaled =
|
||||
2.0f / float(D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION);
|
||||
float pixels_to_ndc_x = pixels_to_ndc_unscaled * resolution_scale_x_;
|
||||
float pixels_to_ndc_y = pixels_to_ndc_unscaled * resolution_scale_y_;
|
||||
float pixels_to_ndc_x = pixels_to_ndc_unscaled * draw_resolution_scale_x();
|
||||
float pixels_to_ndc_y = pixels_to_ndc_unscaled * draw_resolution_scale_y();
|
||||
|
||||
TransferRootSignatureIndex last_transfer_root_signature_index =
|
||||
TransferRootSignatureIndex::kCount;
|
||||
|
@ -4988,18 +4973,18 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
|
|||
++j) {
|
||||
const Transfer::Rectangle& stencil_clear_rectangle =
|
||||
transfer_stencil_clear_rectangles[j];
|
||||
stencil_clear_rect_write_ptr->left =
|
||||
LONG(stencil_clear_rectangle.x_pixels * resolution_scale_x_);
|
||||
stencil_clear_rect_write_ptr->top =
|
||||
LONG(stencil_clear_rectangle.y_pixels * resolution_scale_y_);
|
||||
stencil_clear_rect_write_ptr->left = LONG(
|
||||
stencil_clear_rectangle.x_pixels * draw_resolution_scale_x());
|
||||
stencil_clear_rect_write_ptr->top = LONG(
|
||||
stencil_clear_rectangle.y_pixels * draw_resolution_scale_y());
|
||||
stencil_clear_rect_write_ptr->right =
|
||||
LONG((stencil_clear_rectangle.x_pixels +
|
||||
stencil_clear_rectangle.width_pixels) *
|
||||
resolution_scale_x_);
|
||||
draw_resolution_scale_x());
|
||||
stencil_clear_rect_write_ptr->bottom =
|
||||
LONG((stencil_clear_rectangle.y_pixels +
|
||||
stencil_clear_rectangle.height_pixels) *
|
||||
resolution_scale_y_);
|
||||
draw_resolution_scale_y());
|
||||
++stencil_clear_rect_write_ptr;
|
||||
}
|
||||
}
|
||||
|
@ -5967,13 +5952,20 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
// fits in it, while 80x16 doesn't.
|
||||
a.OpDclThreadGroup(40, 16, 1);
|
||||
|
||||
uint32_t draw_resolution_scale_x = this->draw_resolution_scale_x();
|
||||
uint32_t draw_resolution_scale_y = this->draw_resolution_scale_y();
|
||||
|
||||
// For now, as the exact addressing in 64bpp render targets relatively to
|
||||
// 32bpp is unknown, treating 64bpp tiles as storing 40x16 samples rather than
|
||||
// 80x16 for simplicity of addressing into the texture.
|
||||
|
||||
// Get the parts of the address along Y - tile row index within the dispatch
|
||||
// to r0.w, sample Y within the tile to r0.y.
|
||||
if (resolution_scale_y_ == 3) {
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"D3D12RenderTargetCache render target dump shader generation supports Y "
|
||||
"draw resolution scaling factors of only up to 3");
|
||||
if (draw_resolution_scale_y == 3) {
|
||||
// Multiplication part of the division by the (16 * scale) tile height,
|
||||
// specifically 48 here, or 16 * 3.
|
||||
// r0.w = (Y * kDivideScale3) >> 32
|
||||
|
@ -5988,28 +5980,28 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
// r0.y = Y sample position within the tile
|
||||
// r0.w = Y tile position
|
||||
a.OpIMAd(dxbc::Dest::R(0, 0b0010), dxbc::Src::R(0, dxbc::Src::kWWWW),
|
||||
dxbc::Src::LI(-16 * resolution_scale_y_),
|
||||
dxbc::Src::LI(-16 * draw_resolution_scale_y),
|
||||
dxbc::Src::VThreadID(dxbc::Src::kYYYY));
|
||||
} else {
|
||||
assert_true(resolution_scale_y_ <= 2);
|
||||
assert_true(draw_resolution_scale_y <= 2);
|
||||
// Tile height is a power of two, can use bit operations.
|
||||
// Get the tile row index into r0.w.
|
||||
// r0.w = Y tile position.
|
||||
a.OpUShR(dxbc::Dest::R(0, 0b1000), dxbc::Src::VThreadID(dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU(resolution_scale_y_ == 2 ? 5 : 4));
|
||||
dxbc::Src::LU(draw_resolution_scale_y == 2 ? 5 : 4));
|
||||
// Get the Y sample position within the tile into r0.y.
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.w = Y tile position
|
||||
a.OpAnd(dxbc::Dest::R(0, 0b0010), dxbc::Src::VThreadID(dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU((16 * resolution_scale_y_) - 1));
|
||||
dxbc::Src::LU((16 * draw_resolution_scale_y) - 1));
|
||||
}
|
||||
|
||||
// Get the X tile offset within the dispatch to r0.z.
|
||||
uint32_t tile_width = xenos::kEdramTileWidthSamples * resolution_scale_x_;
|
||||
uint32_t tile_width = xenos::kEdramTileWidthSamples * draw_resolution_scale_x;
|
||||
uint32_t tile_width_divide_scale;
|
||||
uint32_t tile_width_divide_upper_shift;
|
||||
draw_util::GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
resolution_scale_x_, tile_width_divide_scale,
|
||||
draw_resolution_scale_x, tile_width_divide_scale,
|
||||
tile_width_divide_upper_shift);
|
||||
if (format_is_64bpp) {
|
||||
tile_width >>= 1;
|
||||
|
@ -6082,7 +6074,7 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
// r0.w = tile index in the EDRAM
|
||||
a.OpUMAd(dxbc::Dest::R(0, 0b0100), dxbc::Src::R(0, dxbc::Src::kWWWW),
|
||||
dxbc::Src::LU(
|
||||
resolution_scale_x_ * resolution_scale_y_ *
|
||||
draw_resolution_scale_x * draw_resolution_scale_y *
|
||||
(xenos::kEdramTileWidthSamples >> uint32_t(format_is_64bpp)) *
|
||||
xenos::kEdramTileHeightSamples),
|
||||
dxbc::Src::R(0, dxbc::Src::kXXXX));
|
||||
|
@ -6177,8 +6169,9 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
// r0.y = Y sample position within the source texture
|
||||
// r0.z = sample offset in the EDRAM
|
||||
// r1.x = free
|
||||
a.OpUMAd(dxbc::Dest::R(0, 0b0010), dxbc::Src::R(1, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(xenos::kEdramTileHeightSamples * resolution_scale_y_),
|
||||
a.OpUMAd(
|
||||
dxbc::Dest::R(0, 0b0010), dxbc::Src::R(1, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(xenos::kEdramTileHeightSamples * draw_resolution_scale_y),
|
||||
dxbc::Src::R(0, dxbc::Src::kYYYY));
|
||||
// Will be using the source texture coordinates from r0.xy, and for
|
||||
// single-sampled source, LOD from r0.w.
|
||||
|
@ -6708,9 +6701,10 @@ void D3D12RenderTargetCache::DumpRenderTargets(uint32_t dump_base,
|
|||
command_processor_.SubmitBarriers();
|
||||
// Processing 40 x 16 x scale samples per dispatch (a 32bpp tile in two
|
||||
// dispatches at 1x1 scale, 64bpp in one dispatch).
|
||||
command_list.D3DDispatch((dispatch.width_tiles * resolution_scale_x_)
|
||||
command_list.D3DDispatch(
|
||||
(dispatch.width_tiles * draw_resolution_scale_x())
|
||||
<< uint32_t(!format_is_64bpp),
|
||||
dispatch.height_tiles * resolution_scale_y_, 1);
|
||||
dispatch.height_tiles * draw_resolution_scale_y(), 1);
|
||||
}
|
||||
MarkEdramBufferModified();
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shared_memory.h"
|
||||
#include "xenia/gpu/d3d12/texture_cache.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_texture_cache.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/render_target_cache.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
|
@ -44,9 +44,12 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
public:
|
||||
D3D12RenderTargetCache(const RegisterFile& register_file,
|
||||
const Memory& memory, TraceWriter& trace_writer,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y,
|
||||
D3D12CommandProcessor& command_processor,
|
||||
bool bindless_resources_used)
|
||||
: RenderTargetCache(register_file, memory, &trace_writer),
|
||||
: RenderTargetCache(register_file, memory, &trace_writer,
|
||||
draw_resolution_scale_x, draw_resolution_scale_y),
|
||||
command_processor_(command_processor),
|
||||
trace_writer_(trace_writer),
|
||||
bindless_resources_used_(bindless_resources_used) {}
|
||||
|
@ -60,9 +63,6 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
|
||||
Path GetPath() const override { return path_; }
|
||||
|
||||
uint32_t GetResolutionScaleX() const override { return resolution_scale_x_; }
|
||||
uint32_t GetResolutionScaleY() const override { return resolution_scale_y_; }
|
||||
|
||||
bool Update(bool is_rasterization_done,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||
uint32_t normalized_color_mask,
|
||||
|
@ -85,7 +85,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
// register values, and also clears the render targets if needed. Must be in a
|
||||
// frame for calling.
|
||||
bool Resolve(const Memory& memory, D3D12SharedMemory& shared_memory,
|
||||
TextureCache& texture_cache, uint32_t& written_address_out,
|
||||
D3D12TextureCache& texture_cache, uint32_t& written_address_out,
|
||||
uint32_t& written_length_out);
|
||||
|
||||
// Returns true if any downloads were submitted to the command processor.
|
||||
|
@ -164,8 +164,6 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
bool bindless_resources_used_;
|
||||
|
||||
Path path_ = Path::kHostRenderTargets;
|
||||
uint32_t resolution_scale_x_ = 1;
|
||||
uint32_t resolution_scale_y_ = 1;
|
||||
|
||||
// For host render targets, an EDRAM-sized scratch buffer for:
|
||||
// - Guest render target data copied from host render targets during copying
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,592 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D12_D3D12_TEXTURE_CACHE_H_
|
||||
#define XENIA_GPU_D3D12_D3D12_TEXTURE_CACHE_H_
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shared_memory.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/texture_cache.h"
|
||||
#include "xenia/gpu/texture_util.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
#include "xenia/ui/d3d12/d3d12_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
||||
class D3D12CommandProcessor;
|
||||
|
||||
class D3D12TextureCache final : public TextureCache {
|
||||
public:
|
||||
// Keys that can be stored for checking validity whether descriptors for host
|
||||
// shader bindings are up to date.
|
||||
struct TextureSRVKey {
|
||||
TextureKey key;
|
||||
uint32_t host_swizzle;
|
||||
uint8_t swizzled_signs;
|
||||
};
|
||||
|
||||
// Sampler parameters that can be directly converted to a host sampler or used
|
||||
// for binding checking validity whether samplers are up to date.
|
||||
union SamplerParameters {
|
||||
uint32_t value;
|
||||
struct {
|
||||
xenos::ClampMode clamp_x : 3; // 3
|
||||
xenos::ClampMode clamp_y : 3; // 6
|
||||
xenos::ClampMode clamp_z : 3; // 9
|
||||
xenos::BorderColor border_color : 2; // 11
|
||||
// For anisotropic, these are true.
|
||||
uint32_t mag_linear : 1; // 12
|
||||
uint32_t min_linear : 1; // 13
|
||||
uint32_t mip_linear : 1; // 14
|
||||
xenos::AnisoFilter aniso_filter : 3; // 17
|
||||
uint32_t mip_min_level : 4; // 21
|
||||
// Maximum mip level is in the texture resource itself.
|
||||
};
|
||||
|
||||
SamplerParameters() : value(0) { static_assert_size(*this, sizeof(value)); }
|
||||
bool operator==(const SamplerParameters& parameters) const {
|
||||
return value == parameters.value;
|
||||
}
|
||||
bool operator!=(const SamplerParameters& parameters) const {
|
||||
return value != parameters.value;
|
||||
}
|
||||
};
|
||||
|
||||
static std::unique_ptr<D3D12TextureCache> Create(
|
||||
const RegisterFile& register_file, D3D12SharedMemory& shared_memory,
|
||||
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y,
|
||||
D3D12CommandProcessor& command_processor, bool bindless_resources_used) {
|
||||
std::unique_ptr<D3D12TextureCache> texture_cache(new D3D12TextureCache(
|
||||
register_file, shared_memory, draw_resolution_scale_x,
|
||||
draw_resolution_scale_y, command_processor, bindless_resources_used));
|
||||
if (!texture_cache->Initialize()) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::move(texture_cache);
|
||||
}
|
||||
|
||||
~D3D12TextureCache();
|
||||
|
||||
void ClearCache();
|
||||
|
||||
void BeginSubmission(uint64_t new_submission_index) override;
|
||||
void BeginFrame() override;
|
||||
void EndFrame();
|
||||
|
||||
// Must be called within a submission - creates and untiles textures needed by
|
||||
// shaders and puts them in the SRV state. This may bind compute pipelines
|
||||
// (notifying the command processor about that), so this must be called before
|
||||
// binding the actual drawing pipeline.
|
||||
void RequestTextures(uint32_t used_texture_mask) override;
|
||||
|
||||
// Returns whether texture SRV keys stored externally are still valid for the
|
||||
// current bindings and host shader binding layout. Both keys and
|
||||
// host_shader_bindings must have host_shader_binding_count elements
|
||||
// (otherwise they are incompatible - like if this function returned false).
|
||||
bool AreActiveTextureSRVKeysUpToDate(
|
||||
const TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
size_t host_shader_binding_count) const;
|
||||
// Exports the current binding data to texture SRV keys so they can be stored
|
||||
// for checking whether subsequent draw calls can keep using the same
|
||||
// bindings. Write host_shader_binding_count keys.
|
||||
void WriteActiveTextureSRVKeys(
|
||||
TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
size_t host_shader_binding_count) const;
|
||||
void WriteActiveTextureBindfulSRV(
|
||||
const D3D12Shader::TextureBinding& host_shader_binding,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
uint32_t GetActiveTextureBindlessSRVIndex(
|
||||
const D3D12Shader::TextureBinding& host_shader_binding);
|
||||
|
||||
SamplerParameters GetSamplerParameters(
|
||||
const D3D12Shader::SamplerBinding& binding) const;
|
||||
void WriteSampler(SamplerParameters parameters,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) const;
|
||||
|
||||
// Returns whether the actual scale is not smaller than the requested one.
|
||||
static bool ClampDrawResolutionScaleToMaxSupported(
|
||||
uint32_t& scale_x, uint32_t& scale_y,
|
||||
const ui::d3d12::D3D12Provider& provider);
|
||||
// Ensures the tiles backing the range in the buffers are allocated.
|
||||
bool EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled,
|
||||
uint32_t length_unscaled) override;
|
||||
// Makes the specified range of up to 1-2 GB currently accessible on the GPU.
|
||||
// One draw call can access only at most one range - the same memory is
|
||||
// accessible through different buffers based on the range needed, so aliasing
|
||||
// barriers are required.
|
||||
bool MakeScaledResolveRangeCurrent(uint32_t start_unscaled,
|
||||
uint32_t length_unscaled);
|
||||
// These functions create a view of the range specified in the last successful
|
||||
// MakeScaledResolveRangeCurrent call because that function must be called
|
||||
// before this.
|
||||
void CreateCurrentScaledResolveRangeUintPow2SRV(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2);
|
||||
void CreateCurrentScaledResolveRangeUintPow2UAV(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2);
|
||||
void TransitionCurrentScaledResolveRange(D3D12_RESOURCE_STATES new_state);
|
||||
void MarkCurrentScaledResolveRangeUAVWritesCommitNeeded() {
|
||||
assert_true(IsDrawResolutionScaled());
|
||||
GetCurrentScaledResolveBuffer().SetUAVBarrierPending();
|
||||
}
|
||||
|
||||
// Returns the ID3D12Resource of the front buffer texture (in
|
||||
// PIXEL_SHADER_RESOURCE state), or nullptr in case of failure, and writes the
|
||||
// description of its SRV. May call LoadTextureData, so the same restrictions
|
||||
// (such as about descriptor heap change possibility) apply.
|
||||
ID3D12Resource* RequestSwapTexture(
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
|
||||
xenos::TextureFormat& format_out);
|
||||
|
||||
protected:
|
||||
bool IsSignedVersionSeparateForFormat(TextureKey key) const override;
|
||||
bool IsScaledResolveSupportedForFormat(TextureKey key) const override;
|
||||
uint32_t GetHostFormatSwizzle(TextureKey key) const override;
|
||||
|
||||
uint32_t GetMaxHostTextureWidthHeight(
|
||||
xenos::DataDimension dimension) const override;
|
||||
uint32_t GetMaxHostTextureDepthOrArraySize(
|
||||
xenos::DataDimension dimension) const override;
|
||||
|
||||
std::unique_ptr<Texture> CreateTexture(TextureKey key) override;
|
||||
|
||||
// This binds pipelines, allocates descriptors, and copies!
|
||||
bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
|
||||
bool load_mips) override;
|
||||
|
||||
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
|
||||
|
||||
private:
|
||||
enum class LoadMode {
|
||||
k8bpb,
|
||||
k16bpb,
|
||||
k32bpb,
|
||||
k64bpb,
|
||||
k128bpb,
|
||||
kR5G5B5A1ToB5G5R5A1,
|
||||
kR5G6B5ToB5G6R5,
|
||||
kR5G5B6ToB5G6R5WithRBGASwizzle,
|
||||
kR4G4B4A4ToB4G4R4A4,
|
||||
kR10G11B11ToRGBA16,
|
||||
kR10G11B11ToRGBA16SNorm,
|
||||
kR11G11B10ToRGBA16,
|
||||
kR11G11B10ToRGBA16SNorm,
|
||||
kDXT1ToRGBA8,
|
||||
kDXT3ToRGBA8,
|
||||
kDXT5ToRGBA8,
|
||||
kDXNToRG8,
|
||||
kDXT3A,
|
||||
kDXT3AAs1111ToBGRA4,
|
||||
kDXT5AToR8,
|
||||
kCTX1,
|
||||
kDepthUnorm,
|
||||
kDepthFloat,
|
||||
|
||||
kCount,
|
||||
|
||||
kUnknown = kCount
|
||||
};
|
||||
|
||||
struct LoadModeInfo {
|
||||
// Shader without resolution scaling.
|
||||
const void* shader;
|
||||
size_t shader_size;
|
||||
// Shader with resolution scaling, if available. These shaders are separate
|
||||
// so the majority of the textures are not affected by the code needed for
|
||||
// resolution scale support, and also to check if the format allows
|
||||
// resolution scaling.
|
||||
const void* shader_scaled;
|
||||
size_t shader_scaled_size;
|
||||
// Log2 of the sizes, in bytes, of the source (guest) SRV and the
|
||||
// destination (host) UAV accessed by the copying shader, since the shader
|
||||
// may copy multiple blocks per one invocation.
|
||||
uint32_t srv_bpe_log2;
|
||||
uint32_t uav_bpe_log2;
|
||||
// Number of host blocks (or texels for uncompressed) along X axis written
|
||||
// by every compute shader thread - rows in the upload buffer are padded to
|
||||
// at least this amount.
|
||||
uint32_t host_x_blocks_per_thread;
|
||||
};
|
||||
|
||||
struct HostFormat {
|
||||
// Format info for the regular case.
|
||||
// DXGI format (typeless when different signedness or number representation
|
||||
// is used) for the texture resource.
|
||||
DXGI_FORMAT dxgi_format_resource;
|
||||
// DXGI format for unsigned normalized or unsigned/signed float SRV.
|
||||
DXGI_FORMAT dxgi_format_unorm;
|
||||
// The regular load mode, used when special modes (like signed-specific or
|
||||
// decompressing) aren't needed.
|
||||
LoadMode load_mode;
|
||||
// DXGI format for signed normalized or unsigned/signed float SRV.
|
||||
DXGI_FORMAT dxgi_format_snorm;
|
||||
// If the signed version needs a different bit representation on the host,
|
||||
// this is the load mode for the signed version. Otherwise the regular
|
||||
// load_mode will be used for the signed version, and a single copy will be
|
||||
// created if both unsigned and signed are used.
|
||||
LoadMode load_mode_snorm;
|
||||
|
||||
// Do NOT add integer DXGI formats to this - they are not filterable, can
|
||||
// only be read with Load, not Sample! If any game is seen using num_format
|
||||
// 1 for fixed-point formats (for floating-point, it's normally set to 1
|
||||
// though), add a constant buffer containing multipliers for the
|
||||
// textures and multiplication to the tfetch implementation.
|
||||
|
||||
// Whether the DXGI format, if not uncompressing the texture, consists of
|
||||
// blocks, thus copy regions must be aligned to block size.
|
||||
bool dxgi_format_block_aligned;
|
||||
// Uncompression info for when the regular host format for this texture is
|
||||
// block-compressed, but the size is not block-aligned, and thus such
|
||||
// texture cannot be created in Direct3D on PC and needs decompression,
|
||||
// however, such textures are common, for instance, in 4D5307E6. This only
|
||||
// supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not
|
||||
// used for DXN and DXT5A.
|
||||
DXGI_FORMAT dxgi_format_uncompressed;
|
||||
LoadMode decompress_mode;
|
||||
|
||||
// Mapping of Xenos swizzle components to DXGI format components.
|
||||
uint32_t swizzle;
|
||||
};
|
||||
|
||||
class D3D12Texture final : public Texture {
|
||||
public:
|
||||
D3D12Texture(D3D12TextureCache& texture_cache, const TextureKey& key,
|
||||
ID3D12Resource* resource,
|
||||
D3D12_RESOURCE_STATES resource_state);
|
||||
~D3D12Texture();
|
||||
|
||||
ID3D12Resource* resource() const { return resource_.Get(); }
|
||||
|
||||
D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) {
|
||||
D3D12_RESOURCE_STATES old_state = resource_state_;
|
||||
resource_state_ = new_state;
|
||||
return old_state;
|
||||
}
|
||||
|
||||
uint32_t GetSRVDescriptorIndex(uint32_t descriptor_key) const {
|
||||
auto it = srv_descriptors_.find(descriptor_key);
|
||||
return it != srv_descriptors_.cend() ? it->second : UINT32_MAX;
|
||||
}
|
||||
|
||||
void AddSRVDescriptorIndex(uint32_t descriptor_key,
|
||||
uint32_t descriptor_index) {
|
||||
srv_descriptors_.emplace(descriptor_key, descriptor_index);
|
||||
}
|
||||
|
||||
private:
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> resource_;
|
||||
D3D12_RESOURCE_STATES resource_state_;
|
||||
|
||||
// For bindful - indices in the non-shader-visible descriptor cache for
|
||||
// copying to the shader-visible heap (much faster than recreating, which,
|
||||
// according to profiling, was often a bottleneck in many games).
|
||||
// For bindless - indices in the global shader-visible descriptor heap.
|
||||
std::unordered_map<uint32_t, uint32_t> srv_descriptors_;
|
||||
};
|
||||
|
||||
static constexpr uint32_t kSRVDescriptorCachePageSize = 65536;
|
||||
|
||||
struct SRVDescriptorCachePage {
|
||||
public:
|
||||
explicit SRVDescriptorCachePage(ID3D12DescriptorHeap* heap)
|
||||
: heap_(heap),
|
||||
heap_start_(heap->GetCPUDescriptorHandleForHeapStart()) {}
|
||||
SRVDescriptorCachePage(const SRVDescriptorCachePage& page) = delete;
|
||||
SRVDescriptorCachePage& operator=(const SRVDescriptorCachePage& page) =
|
||||
delete;
|
||||
SRVDescriptorCachePage(SRVDescriptorCachePage&& page) {
|
||||
std::swap(heap_, page.heap_);
|
||||
std::swap(heap_start_, page.heap_start_);
|
||||
}
|
||||
SRVDescriptorCachePage& operator=(SRVDescriptorCachePage&& page) {
|
||||
std::swap(heap_, page.heap_);
|
||||
std::swap(heap_start_, page.heap_start_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
ID3D12DescriptorHeap* heap() const { return heap_.Get(); }
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE heap_start() const { return heap_start_; }
|
||||
|
||||
private:
|
||||
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap_;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE heap_start_;
|
||||
};
|
||||
|
||||
struct D3D12TextureBinding {
|
||||
// Descriptor indices of texture and texture_signed of the respective
|
||||
// TextureBinding returned from FindOrCreateTextureDescriptor.
|
||||
uint32_t descriptor_index;
|
||||
uint32_t descriptor_index_signed;
|
||||
|
||||
D3D12TextureBinding() { Reset(); }
|
||||
|
||||
void Reset() {
|
||||
descriptor_index = UINT32_MAX;
|
||||
descriptor_index_signed = UINT32_MAX;
|
||||
}
|
||||
};
|
||||
|
||||
class ScaledResolveVirtualBuffer {
|
||||
public:
|
||||
ScaledResolveVirtualBuffer(ID3D12Resource* resource,
|
||||
D3D12_RESOURCE_STATES resource_state)
|
||||
: resource_(resource), resource_state_(resource_state) {}
|
||||
ID3D12Resource* resource() const { return resource_.Get(); }
|
||||
D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) {
|
||||
D3D12_RESOURCE_STATES old_state = resource_state_;
|
||||
if (old_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||
uav_barrier_pending_ = false;
|
||||
}
|
||||
resource_state_ = new_state;
|
||||
return old_state;
|
||||
}
|
||||
// After writing through a UAV.
|
||||
void SetUAVBarrierPending() {
|
||||
if (resource_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||
uav_barrier_pending_ = true;
|
||||
}
|
||||
}
|
||||
// After an aliasing barrier (which is even stronger than an UAV barrier).
|
||||
void ClearUAVBarrierPending() { uav_barrier_pending_ = false; }
|
||||
|
||||
private:
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> resource_;
|
||||
D3D12_RESOURCE_STATES resource_state_;
|
||||
bool uav_barrier_pending_ = false;
|
||||
};
|
||||
|
||||
D3D12TextureCache(const RegisterFile& register_file,
|
||||
D3D12SharedMemory& shared_memory,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y,
|
||||
D3D12CommandProcessor& command_processor,
|
||||
bool bindless_resources_used);
|
||||
|
||||
bool Initialize();
|
||||
|
||||
// Whether decompression is needed on the host (Direct3D only allows creation
|
||||
// of block-compressed textures with 4x4-aligned dimensions on PC).
|
||||
static bool IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width,
|
||||
uint32_t height);
|
||||
static DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
|
||||
uint32_t width, uint32_t height) {
|
||||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return IsDecompressionNeeded(format, width, height)
|
||||
? host_format.dxgi_format_uncompressed
|
||||
: host_format.dxgi_format_resource;
|
||||
}
|
||||
static DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
|
||||
return GetDXGIResourceFormat(key.format, key.GetWidth(), key.GetHeight());
|
||||
}
|
||||
static DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
|
||||
uint32_t width, uint32_t height) {
|
||||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return IsDecompressionNeeded(format, width, height)
|
||||
? host_format.dxgi_format_uncompressed
|
||||
: host_format.dxgi_format_unorm;
|
||||
}
|
||||
static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
|
||||
return GetDXGIUnormFormat(key.format, key.GetWidth(), key.GetHeight());
|
||||
}
|
||||
|
||||
static LoadMode GetLoadMode(TextureKey key);
|
||||
|
||||
static constexpr bool AreDimensionsCompatible(
|
||||
xenos::FetchOpDimension binding_dimension,
|
||||
xenos::DataDimension resource_dimension) {
|
||||
switch (binding_dimension) {
|
||||
case xenos::FetchOpDimension::k1D:
|
||||
case xenos::FetchOpDimension::k2D:
|
||||
return resource_dimension == xenos::DataDimension::k1D ||
|
||||
resource_dimension == xenos::DataDimension::k2DOrStacked;
|
||||
case xenos::FetchOpDimension::k3DOrStacked:
|
||||
return resource_dimension == xenos::DataDimension::k3D;
|
||||
case xenos::FetchOpDimension::kCube:
|
||||
return resource_dimension == xenos::DataDimension::kCube;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the index of an existing of a newly created non-shader-visible
|
||||
// cached (for bindful) or a shader-visible global (for bindless) descriptor,
|
||||
// or UINT32_MAX if failed to create.
|
||||
uint32_t FindOrCreateTextureDescriptor(D3D12Texture& texture, bool is_signed,
|
||||
uint32_t host_swizzle);
|
||||
void ReleaseTextureDescriptor(uint32_t descriptor_index);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE GetTextureDescriptorCPUHandle(
|
||||
uint32_t descriptor_index) const;
|
||||
|
||||
size_t GetScaledResolveBufferCount() const {
|
||||
assert_true(IsDrawResolutionScaled());
|
||||
// Make sure any range up to 1 GB is accessible through 1 or 2 buffers.
|
||||
// 2x2 scale buffers - just one 2 GB buffer for all 2 GB.
|
||||
// 3x3 scale buffers - 4 buffers:
|
||||
// +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5
|
||||
// |___________________|___________________|
|
||||
// |___________________|______________|
|
||||
// Buffer N has an offset of N * 1 GB in the scaled resolve address space.
|
||||
// The logic is:
|
||||
// - 2 GB can be accessed through a [0 GB ... 2 GB) buffer - only need one.
|
||||
// - 2.1 GB needs [0 GB ... 2 GB) and [1 GB ... 2.1 GB) - two buffers.
|
||||
// - 3 GB needs [0 GB ... 2 GB) and [1 GB ... 3 GB) - two buffers.
|
||||
// - 3.1 GB needs [0 GB ... 2 GB), [1 GB ... 3 GB) and [2 GB ... 3.1 GB) -
|
||||
// three buffers.
|
||||
uint64_t address_space_size =
|
||||
uint64_t(SharedMemory::kBufferSize) *
|
||||
(draw_resolution_scale_x() * draw_resolution_scale_y());
|
||||
return size_t((address_space_size - 1) >> 30);
|
||||
}
|
||||
// Returns indices of two scaled resolve virtual buffers that the location in
|
||||
// memory may be accessible through. May be the same if it's a location near
|
||||
// the beginning or the end of the address represented only by one buffer.
|
||||
std::array<size_t, 2> GetPossibleScaledResolveBufferIndices(
|
||||
uint64_t address_scaled) const {
|
||||
assert_true(IsDrawResolutionScaled());
|
||||
size_t address_gb = size_t(address_scaled >> 30);
|
||||
size_t max_index = GetScaledResolveBufferCount() - 1;
|
||||
// In different cases for 3x3:
|
||||
// +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5
|
||||
// |12________2________|1_________2________|
|
||||
// |1_________2________|1_________12__|
|
||||
return std::array<size_t, 2>{
|
||||
std::min(address_gb, max_index),
|
||||
std::min(std::max(address_gb, size_t(1)) - size_t(1), max_index)};
|
||||
}
|
||||
// The index is also the gigabyte offset of the buffer from the start of the
|
||||
// scaled physical memory address space.
|
||||
size_t GetCurrentScaledResolveBufferIndex() const {
|
||||
return scaled_resolve_1gb_buffer_indices_
|
||||
[scaled_resolve_current_range_start_scaled_ >> 30];
|
||||
}
|
||||
ScaledResolveVirtualBuffer& GetCurrentScaledResolveBuffer() {
|
||||
ScaledResolveVirtualBuffer* scaled_resolve_buffer =
|
||||
scaled_resolve_2gb_buffers_[GetCurrentScaledResolveBufferIndex()].get();
|
||||
assert_not_null(scaled_resolve_buffer);
|
||||
return *scaled_resolve_buffer;
|
||||
}
|
||||
|
||||
static const HostFormat host_formats_[64];
|
||||
|
||||
D3D12CommandProcessor& command_processor_;
|
||||
bool bindless_resources_used_;
|
||||
|
||||
static const LoadModeInfo load_mode_info_[];
|
||||
Microsoft::WRL::ComPtr<ID3D12RootSignature> load_root_signature_;
|
||||
std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>,
|
||||
size_t(LoadMode::kCount)>
|
||||
load_pipelines_;
|
||||
// Load pipelines for resolution-scaled resolve targets.
|
||||
std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>,
|
||||
size_t(LoadMode::kCount)>
|
||||
load_pipelines_scaled_;
|
||||
|
||||
std::vector<SRVDescriptorCachePage> srv_descriptor_cache_;
|
||||
uint32_t srv_descriptor_cache_allocated_;
|
||||
// Indices of cached descriptors used by deleted textures, for reuse.
|
||||
std::vector<uint32_t> srv_descriptor_cache_free_;
|
||||
|
||||
enum class NullSRVDescriptorIndex {
|
||||
k2DArray,
|
||||
k3D,
|
||||
kCube,
|
||||
|
||||
kCount,
|
||||
};
|
||||
// Contains null SRV descriptors of dimensions from NullSRVDescriptorIndex.
|
||||
// For copying, not shader-visible.
|
||||
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> null_srv_descriptor_heap_;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE null_srv_descriptor_heap_start_;
|
||||
|
||||
std::array<D3D12TextureBinding, xenos::kTextureFetchConstantCount>
|
||||
d3d12_texture_bindings_;
|
||||
|
||||
// Unsupported texture formats used during this frame (for research and
|
||||
// testing).
|
||||
enum : uint8_t {
|
||||
kUnsupportedResourceBit = 1,
|
||||
kUnsupportedUnormBit = kUnsupportedResourceBit << 1,
|
||||
kUnsupportedSnormBit = kUnsupportedUnormBit << 1,
|
||||
};
|
||||
uint8_t unsupported_format_features_used_[64];
|
||||
|
||||
// The tiled buffer for resolved data with resolution scaling.
|
||||
// Because on Direct3D 12 (at least on Windows 10 2004) typed SRV or UAV
|
||||
// creation fails for offsets above 4 GB, a single tiled 4.5 GB buffer can't
|
||||
// be used for 3x3 resolution scaling.
|
||||
// Instead, "sliding window" buffers allowing to access a single range of up
|
||||
// to 1 GB (or up to 2 GB, depending on the low bits) at any moment are used.
|
||||
// Parts of 4.5 GB address space can be accessed through 2 GB buffers as:
|
||||
// +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5
|
||||
// |___________________|___________________| or
|
||||
// |___________________|______________|
|
||||
// (2 GB is also the amount of scaled physical memory with 2x resolution
|
||||
// scale, and older Intel GPUs, while support tiled resources, only support 31
|
||||
// virtual address bits per resource).
|
||||
// Index is first gigabyte. Only including buffers containing over 1 GB
|
||||
// (because otherwise the data will be fully contained in another).
|
||||
// Size is calculated the same as in GetScaledResolveBufferCount.
|
||||
std::array<std::unique_ptr<ScaledResolveVirtualBuffer>,
|
||||
(uint64_t(SharedMemory::kBufferSize) *
|
||||
(kMaxDrawResolutionScaleAlongAxis *
|
||||
kMaxDrawResolutionScaleAlongAxis) -
|
||||
1) /
|
||||
(UINT32_C(1) << 30)>
|
||||
scaled_resolve_2gb_buffers_;
|
||||
// Not very big heaps (16 MB) because they are needed pretty sparsely. One
|
||||
// 2x-scaled 1280x720x32bpp texture is slighly bigger than 14 MB.
|
||||
static constexpr uint32_t kScaledResolveHeapSizeLog2 = 24;
|
||||
static constexpr uint32_t kScaledResolveHeapSize =
|
||||
uint32_t(1) << kScaledResolveHeapSizeLog2;
|
||||
static_assert(
|
||||
(kScaledResolveHeapSize % D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) == 0,
|
||||
"Scaled resolve heap size must be a multiple of Direct3D tile size");
|
||||
static_assert(
|
||||
kScaledResolveHeapSizeLog2 <= SharedMemory::kBufferSizeLog2,
|
||||
"Scaled resolve heaps are assumed to be wholly mappable irrespective of "
|
||||
"resolution scale, never truncated, for example, if the scaled resolve "
|
||||
"address space is 4.5 GB, but the heap size is 1 GB");
|
||||
static_assert(
|
||||
kScaledResolveHeapSizeLog2 <= 30,
|
||||
"Scaled resolve heaps are assumed to only be wholly mappable to up to "
|
||||
"two 2 GB buffers");
|
||||
// Resident portions of the tiled buffer.
|
||||
std::vector<Microsoft::WRL::ComPtr<ID3D12Heap>> scaled_resolve_heaps_;
|
||||
// Number of currently resident portions of the tiled buffer, for profiling.
|
||||
uint32_t scaled_resolve_heap_count_ = 0;
|
||||
// Current scaled resolve state.
|
||||
// For aliasing barrier placement, last owning buffer index for each of 1 GB.
|
||||
size_t
|
||||
scaled_resolve_1gb_buffer_indices_[(uint64_t(SharedMemory::kBufferSize) *
|
||||
kMaxDrawResolutionScaleAlongAxis *
|
||||
kMaxDrawResolutionScaleAlongAxis +
|
||||
((uint32_t(1) << 30) - 1)) >>
|
||||
30];
|
||||
// Range used in the last successful MakeScaledResolveRangeCurrent call.
|
||||
uint64_t scaled_resolve_current_range_start_scaled_;
|
||||
uint64_t scaled_resolve_current_range_length_scaled_;
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_D3D12_D3D12_TEXTURE_CACHE_H_
|
|
@ -98,8 +98,8 @@ PipelineCache::PipelineCache(D3D12CommandProcessor& command_processor,
|
|||
provider.GetAdapterVendorID(), bindless_resources_used_, edram_rov_used,
|
||||
render_target_cache_.gamma_render_target_as_srgb(),
|
||||
render_target_cache_.msaa_2x_supported(),
|
||||
render_target_cache_.GetResolutionScaleX(),
|
||||
render_target_cache_.GetResolutionScaleY(),
|
||||
render_target_cache_.draw_resolution_scale_x(),
|
||||
render_target_cache_.draw_resolution_scale_y(),
|
||||
provider.GetGraphicsAnalysis() != nullptr);
|
||||
|
||||
if (edram_rov_used) {
|
||||
|
@ -426,8 +426,8 @@ void PipelineCache::InitializeShaderStorage(
|
|||
provider.GetAdapterVendorID(), bindless_resources_used_,
|
||||
edram_rov_used, render_target_cache_.gamma_render_target_as_srgb(),
|
||||
render_target_cache_.msaa_2x_supported(),
|
||||
render_target_cache_.GetResolutionScaleX(),
|
||||
render_target_cache_.GetResolutionScaleY(),
|
||||
render_target_cache_.draw_resolution_scale_x(),
|
||||
render_target_cache_.draw_resolution_scale_y(),
|
||||
provider.GetGraphicsAnalysis() != nullptr);
|
||||
// If needed and possible, create objects needed for DXIL conversion and
|
||||
// disassembly on this thread.
|
||||
|
@ -3001,8 +3001,8 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
|
|||
// more likely.
|
||||
state_desc.RasterizerState.SlopeScaledDepthBias =
|
||||
description.depth_bias_slope_scaled *
|
||||
float(std::max(render_target_cache_.GetResolutionScaleX(),
|
||||
render_target_cache_.GetResolutionScaleY()));
|
||||
float(std::max(render_target_cache_.draw_resolution_scale_x(),
|
||||
render_target_cache_.draw_resolution_scale_y()));
|
||||
state_desc.RasterizerState.DepthClipEnable =
|
||||
description.depth_clip ? TRUE : FALSE;
|
||||
uint32_t msaa_sample_count = uint32_t(1)
|
||||
|
|
|
@ -1,887 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D12_TEXTURE_CACHE_H_
|
||||
#define XENIA_GPU_D3D12_TEXTURE_CACHE_H_
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/mutex.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shared_memory.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/texture_util.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
#include "xenia/ui/d3d12/d3d12_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
||||
class D3D12CommandProcessor;
|
||||
|
||||
// Manages host copies of guest textures, performing untiling, format and endian
|
||||
// conversion of textures stored in the shared memory, and also handling
|
||||
// invalidation.
|
||||
//
|
||||
// Mipmaps are treated the following way, according to the GPU hang message
|
||||
// found in game executables explaining the valid usage of BaseAddress when
|
||||
// streaming the largest LOD (it says games should not use 0 as the base address
|
||||
// when the largest LOD isn't loaded, but rather, either allocate a valid
|
||||
// address for it or make it the same as mip_address):
|
||||
// - If the texture has a base address, but no mip address, it's not mipmapped -
|
||||
// the host texture has only the largest level too.
|
||||
// - If the texture has different non-zero base address and mip address, a host
|
||||
// texture with mip_max_level+1 mipmaps is created - mip_min_level is ignored
|
||||
// and treated purely as sampler state because there are tfetch instructions
|
||||
// working directly with LOD values - including fetching with an explicit LOD.
|
||||
// However, the max level is not ignored because any mip count can be
|
||||
// specified when creating a texture, and another texture may be placed after
|
||||
// the last one.
|
||||
// - If the texture has a mip address, but the base address is 0 or the same as
|
||||
// the mip address, a mipmapped texture is created, but min/max LOD is clamped
|
||||
// to the lower bound of 1 - the game is expected to do that anyway until the
|
||||
// largest LOD is loaded.
|
||||
// TODO(Triang3l): Attach the largest LOD to existing textures with a valid
|
||||
// mip_address but no base ever used yet (no base_address) to save memory
|
||||
// because textures are streamed this way anyway.
|
||||
class TextureCache {
|
||||
struct TextureKey {
|
||||
// Dimensions minus 1 are stored similarly to how they're stored in fetch
|
||||
// constants so fewer bits can be used, while the maximum size (8192 for 2D)
|
||||
// can still be encoded (a 8192x sky texture is used in 4D530910).
|
||||
|
||||
// Physical 4 KB page with the base mip level, disregarding A/C/E address
|
||||
// range prefix.
|
||||
uint32_t base_page : 17; // 17 total
|
||||
xenos::DataDimension dimension : 2; // 19
|
||||
uint32_t width_minus_1 : 13; // 32
|
||||
|
||||
uint32_t height_minus_1 : 13; // 45
|
||||
uint32_t tiled : 1; // 46
|
||||
uint32_t packed_mips : 1; // 47
|
||||
// Physical 4 KB page with mip 1 and smaller.
|
||||
uint32_t mip_page : 17; // 64
|
||||
|
||||
// (Layers for stacked and 3D, 6 for cube, 1 for other dimensions) - 1.
|
||||
uint32_t depth_or_array_size_minus_1 : 10; // 74
|
||||
uint32_t pitch : 9; // 83
|
||||
uint32_t mip_max_level : 4; // 87
|
||||
xenos::TextureFormat format : 6; // 93
|
||||
xenos::Endian endianness : 2; // 95
|
||||
// Whether this texture is signed and has a different host representation
|
||||
// than an unsigned view of the same guest texture.
|
||||
uint32_t signed_separate : 1; // 96
|
||||
|
||||
// Whether this texture is a resolution-scaled resolve target.
|
||||
uint32_t scaled_resolve : 1; // 97
|
||||
// Least important in ==, so placed last.
|
||||
uint32_t is_valid : 1; // 98
|
||||
|
||||
TextureKey() { MakeInvalid(); }
|
||||
TextureKey(const TextureKey& key) {
|
||||
std::memcpy(this, &key, sizeof(*this));
|
||||
}
|
||||
TextureKey& operator=(const TextureKey& key) {
|
||||
std::memcpy(this, &key, sizeof(*this));
|
||||
return *this;
|
||||
}
|
||||
void MakeInvalid() {
|
||||
// Zero everything, including the padding, for a stable hash.
|
||||
std::memset(this, 0, sizeof(*this));
|
||||
}
|
||||
|
||||
uint32_t GetWidth() const { return width_minus_1 + 1; }
|
||||
uint32_t GetHeight() const { return height_minus_1 + 1; }
|
||||
uint32_t GetDepthOrArraySize() const {
|
||||
return depth_or_array_size_minus_1 + 1;
|
||||
}
|
||||
|
||||
using Hasher = xe::hash::XXHasher<TextureKey>;
|
||||
bool operator==(const TextureKey& key) const {
|
||||
return !std::memcmp(this, &key, sizeof(*this));
|
||||
}
|
||||
bool operator!=(const TextureKey& key) const { return !(*this == key); }
|
||||
};
|
||||
|
||||
public:
|
||||
// Keys that can be stored for checking validity whether descriptors for host
|
||||
// shader bindings are up to date.
|
||||
struct TextureSRVKey {
|
||||
TextureKey key;
|
||||
uint32_t host_swizzle;
|
||||
uint8_t swizzled_signs;
|
||||
};
|
||||
|
||||
// Sampler parameters that can be directly converted to a host sampler or used
|
||||
// for binding checking validity whether samplers are up to date.
|
||||
union SamplerParameters {
|
||||
uint32_t value;
|
||||
struct {
|
||||
xenos::ClampMode clamp_x : 3; // 3
|
||||
xenos::ClampMode clamp_y : 3; // 6
|
||||
xenos::ClampMode clamp_z : 3; // 9
|
||||
xenos::BorderColor border_color : 2; // 11
|
||||
// For anisotropic, these are true.
|
||||
uint32_t mag_linear : 1; // 12
|
||||
uint32_t min_linear : 1; // 13
|
||||
uint32_t mip_linear : 1; // 14
|
||||
xenos::AnisoFilter aniso_filter : 3; // 17
|
||||
uint32_t mip_min_level : 4; // 21
|
||||
// Maximum mip level is in the texture resource itself.
|
||||
};
|
||||
|
||||
SamplerParameters() : value(0) { static_assert_size(*this, sizeof(value)); }
|
||||
bool operator==(const SamplerParameters& parameters) const {
|
||||
return value == parameters.value;
|
||||
}
|
||||
bool operator!=(const SamplerParameters& parameters) const {
|
||||
return value != parameters.value;
|
||||
}
|
||||
};
|
||||
|
||||
TextureCache(D3D12CommandProcessor& command_processor,
|
||||
const RegisterFile& register_file,
|
||||
D3D12SharedMemory& shared_memory, bool bindless_resources_used,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y);
|
||||
~TextureCache();
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
void ClearCache();
|
||||
|
||||
void TextureFetchConstantWritten(uint32_t index);
|
||||
|
||||
void BeginSubmission();
|
||||
void BeginFrame();
|
||||
void EndFrame();
|
||||
|
||||
// Must be called within a frame - creates and untiles textures needed by
|
||||
// shaders and puts them in the SRV state. This may bind compute pipelines
|
||||
// (notifying the command processor about that), so this must be called before
|
||||
// binding the actual drawing pipeline.
|
||||
void RequestTextures(uint32_t used_texture_mask);
|
||||
|
||||
// "ActiveTexture" means as of the latest RequestTextures call.
|
||||
|
||||
// Returns whether texture SRV keys stored externally are still valid for the
|
||||
// current bindings and host shader binding layout. Both keys and
|
||||
// host_shader_bindings must have host_shader_binding_count elements
|
||||
// (otherwise they are incompatible - like if this function returned false).
|
||||
bool AreActiveTextureSRVKeysUpToDate(
|
||||
const TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
size_t host_shader_binding_count) const;
|
||||
// Exports the current binding data to texture SRV keys so they can be stored
|
||||
// for checking whether subsequent draw calls can keep using the same
|
||||
// bindings. Write host_shader_binding_count keys.
|
||||
void WriteActiveTextureSRVKeys(
|
||||
TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
size_t host_shader_binding_count) const;
|
||||
// Returns the post-swizzle signedness of a currently bound texture (must be
|
||||
// called after RequestTextures).
|
||||
uint8_t GetActiveTextureSwizzledSigns(uint32_t index) const {
|
||||
return texture_bindings_[index].swizzled_signs;
|
||||
}
|
||||
bool IsActiveTextureResolved(uint32_t index) const {
|
||||
const TextureBinding& binding = texture_bindings_[index];
|
||||
if (binding.texture && binding.texture->IsResolved()) {
|
||||
return true;
|
||||
}
|
||||
if (binding.texture_signed && binding.texture_signed->IsResolved()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void WriteActiveTextureBindfulSRV(
|
||||
const D3D12Shader::TextureBinding& host_shader_binding,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
uint32_t GetActiveTextureBindlessSRVIndex(
|
||||
const D3D12Shader::TextureBinding& host_shader_binding);
|
||||
|
||||
SamplerParameters GetSamplerParameters(
|
||||
const D3D12Shader::SamplerBinding& binding) const;
|
||||
void WriteSampler(SamplerParameters parameters,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) const;
|
||||
|
||||
void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled);
|
||||
// In textures, resolution scaling is done for 8-byte portions of memory for
|
||||
// 8bpp textures, and for 16-byte portions for textures of higher bit depths
|
||||
// (these are the sizes of regions where contiguous texels in memory are also
|
||||
// contiguous in the texture along the horizontal axis, so 64-bit and 128-bit
|
||||
// loads / stores, for 8bpp and 16bpp+ respectively, can be used for untiling
|
||||
// regardless of the resolution scale).
|
||||
static void ClampDrawResolutionScaleToSupportedRange(
|
||||
uint32_t& scale_x, uint32_t& scale_y,
|
||||
const ui::d3d12::D3D12Provider& provider);
|
||||
uint32_t GetDrawResolutionScaleX() const { return draw_resolution_scale_x_; }
|
||||
uint32_t GetDrawResolutionScaleY() const { return draw_resolution_scale_y_; }
|
||||
bool IsDrawResolutionScaled() const {
|
||||
return draw_resolution_scale_x_ > 1 || draw_resolution_scale_y_ > 1;
|
||||
}
|
||||
// Ensures the tiles backing the range in the buffers are allocated.
|
||||
bool EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled,
|
||||
uint32_t length_unscaled);
|
||||
// Makes the specified range of up to 1-2 GB currently accessible on the GPU.
|
||||
// One draw call can access only at most one range - the same memory is
|
||||
// accessible through different buffers based on the range needed, so aliasing
|
||||
// barriers are required.
|
||||
bool MakeScaledResolveRangeCurrent(uint32_t start_unscaled,
|
||||
uint32_t length_unscaled);
|
||||
// These functions create a view of the range specified in the last successful
|
||||
// MakeScaledResolveRangeCurrent call because that function must be called
|
||||
// before this.
|
||||
void CreateCurrentScaledResolveRangeUintPow2SRV(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2);
|
||||
void CreateCurrentScaledResolveRangeUintPow2UAV(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2);
|
||||
void TransitionCurrentScaledResolveRange(D3D12_RESOURCE_STATES new_state);
|
||||
void MarkCurrentScaledResolveRangeUAVWritesCommitNeeded() {
|
||||
assert_true(IsDrawResolutionScaled());
|
||||
GetCurrentScaledResolveBuffer().SetUAVBarrierPending();
|
||||
}
|
||||
|
||||
// Returns the ID3D12Resource of the front buffer texture (in
|
||||
// PIXEL_SHADER_RESOURCE state), or nullptr in case of failure, and writes the
|
||||
// description of its SRV. May call LoadTextureData, so the same restrictions
|
||||
// (such as about descriptor heap change possibility) apply.
|
||||
ID3D12Resource* RequestSwapTexture(
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
|
||||
xenos::TextureFormat& format_out);
|
||||
|
||||
private:
|
||||
// Hard limit, originating from the half-pixel offset (two-pixel offset is too
|
||||
// much, the resolve shaders, being generic for different scales, only
|
||||
// duplicate the second pixel into the first, not the third), and also due to
|
||||
// the bit counts used for passing the scale to shaders.
|
||||
static constexpr uint32_t kMaxDrawResolutionScaleAlongAxis = 3;
|
||||
|
||||
enum class LoadMode {
|
||||
k8bpb,
|
||||
k16bpb,
|
||||
k32bpb,
|
||||
k64bpb,
|
||||
k128bpb,
|
||||
kR5G5B5A1ToB5G5R5A1,
|
||||
kR5G6B5ToB5G6R5,
|
||||
kR5G5B6ToB5G6R5WithRBGASwizzle,
|
||||
kR4G4B4A4ToB4G4R4A4,
|
||||
kR10G11B11ToRGBA16,
|
||||
kR10G11B11ToRGBA16SNorm,
|
||||
kR11G11B10ToRGBA16,
|
||||
kR11G11B10ToRGBA16SNorm,
|
||||
kDXT1ToRGBA8,
|
||||
kDXT3ToRGBA8,
|
||||
kDXT5ToRGBA8,
|
||||
kDXNToRG8,
|
||||
kDXT3A,
|
||||
kDXT3AAs1111ToBGRA4,
|
||||
kDXT5AToR8,
|
||||
kCTX1,
|
||||
kDepthUnorm,
|
||||
kDepthFloat,
|
||||
|
||||
kCount,
|
||||
|
||||
kUnknown = kCount
|
||||
};
|
||||
|
||||
struct LoadModeInfo {
|
||||
// Rules of data access in load shaders:
|
||||
// - Source reading (from the shared memory or the scaled resolve buffer):
|
||||
// - Guest data may be stored in a sparsely-allocated buffer, or, in
|
||||
// Direct3D 12 terms, a tiled buffer. This means that some regions of
|
||||
// the buffer may not be mapped. On tiled resources tier 1 hardware,
|
||||
// accesing unmapped tiles results in undefined behavior, including a
|
||||
// GPU page fault and device removal. So, shaders must not try to access
|
||||
// potentially unmapped regions (that are outside the texture memory
|
||||
// extents calculated on the CPU, taking into account that Xenia can't
|
||||
// overestimate texture sizes freely since it must not try to upload
|
||||
// unallocated pages on the CPU).
|
||||
// - Buffer tiles have 64 KB size on Direct3D 12. Vulkan has its own
|
||||
// alignment requirements for sparse binding. But overall, we're
|
||||
// allocating pretty large regions.
|
||||
// - Resolution scaling disabled:
|
||||
// - Shared memory allocates regions of power of two sizes that map
|
||||
// directly to the same portions of the 512 MB of the console's
|
||||
// physical memory. So, a 64 KB-aligned host buffer region is also 64
|
||||
// KB-aligned in the guest address space.
|
||||
// - Tiled textures: 32x32x4-block tiles are always resident each as a
|
||||
// whole. If the width is bigger than the pitch, the overflowing
|
||||
// 32x32x4 tiles are also loaded as entire tiles. We do not have
|
||||
// separate shaders for 2D and 3D. So, for tiled textures, it's safe
|
||||
// to consider that if any location within a 32x32-aligned portion is
|
||||
// within the texture bounds, the entire 32x32 portion also can be
|
||||
// read.
|
||||
// - Linear textures: Pitch is aligned to 256 bytes. Row count, however,
|
||||
// is not aligned to anything (unless the mip tail is being loaded).
|
||||
// The overflowing last row in case `width > pitch`, however, is made
|
||||
// resident up to the last texel in it. But row start alignment is
|
||||
// 256, which is a power of two, and is smaller than the Direct3D 12
|
||||
// tile size of 64 KB. So, if any block within a 256-aligned region is
|
||||
// within the texture bounds, without resolution scaling, reading from
|
||||
// any location in that 256-aligned region is safe.
|
||||
// - Since we use the same shaders for tiled and linear textures (as
|
||||
// well as 1D textures), this means that without resolution scaling,
|
||||
// it's safe to access a min(256 bytes, 32 blocks)-aligned portion
|
||||
// along X, but only within the same row of blocks, with bounds
|
||||
// checking only for such portion as a whole, but without additional
|
||||
// bounds checking inside of it.
|
||||
// - Therefore, it's recommended that shaders read power-of-two amounts
|
||||
// of blocks (so there will naturally be some alignment to some power
|
||||
// of two), and this way, each thread may read at most 16 16bpb blocks
|
||||
// or at most 32 8bpb or smaller blocks with in a single
|
||||
// `if (x < width)` for the whole aligned range of the same length.
|
||||
// - Resolution scaling enabled:
|
||||
// - For simplicity, unlike in the shared memory, buffer tile boundaries
|
||||
// are not aligned to powers of 2 the same way as guest addresses are.
|
||||
// While for 2x2 resolution scaling it still happens to be the case
|
||||
// because `host scaling unit address = guest scaling unit
|
||||
// address << 2` (similarly for 2x1 and 1x2), for 3x or x3, it's not -
|
||||
// a 64 KB host tile would represent 7281.777 guest bytes with 3x3
|
||||
// (disregarding that sequences of texels that are adjacent in memory
|
||||
// alongside the horizontal axis, not individual bytes, are scaled,
|
||||
// but even in that case it's not scaling by 2^n still).
|
||||
// - The above would affect the `width > pitch` case for linear
|
||||
// textures, requiring overestimating the width in calculation of the
|
||||
// range of the tiles to map, while not doing this overestimation on
|
||||
// the guest memory extent calculation side (otherwise it may result
|
||||
// in attempting to upload unallocated memory on the CPU). For
|
||||
// example, let's take look at an extreme case of a 369x28 k_8 texture
|
||||
// with pitch of 256 bytes. The last row, in guest memory, would be
|
||||
// loaded from the [7168, 7281) range, or, with 3x3 resolution
|
||||
// scaling, from bytes [64512, 65529). However, if we try to
|
||||
// unconditionally load 2 pixels, like the texture is 370x28, we will
|
||||
// be accessing the bytes [64512, 65538). But bytes 65536 and 65537
|
||||
// will be in another 64 KB tile, which may be not mapped yet.
|
||||
// However, none of this is an issue for one simple reason - resolving
|
||||
// is only possible to tiled textures, so linear textures will never
|
||||
// be resolution-scaled.
|
||||
// - Tiled textures have potentially referenced guest 32x32-block tiles
|
||||
// loaded in their entirety. So, just like for unscaled textures, if
|
||||
// any block within a tile is available, the entire tile is as well.
|
||||
// - Destination writing (to the linear buffer):
|
||||
// - host_x_blocks_per_thread specifies how many pixels can be written
|
||||
// without bounds checking within increments of that amount - the pitch
|
||||
// of the destination buffer is manually overaligned if needed.
|
||||
// Shader without resolution scaling.
|
||||
const void* shader;
|
||||
size_t shader_size;
|
||||
// Shader with resolution scaling, if available. These shaders are separate
|
||||
// so the majority of the textures are not affected by the code needed for
|
||||
// resolution scale support, and also to check if the format allows
|
||||
// resolution scaling.
|
||||
const void* shader_scaled;
|
||||
size_t shader_scaled_size;
|
||||
// Log2 of the sizes, in bytes, of the source (guest) SRV and the
|
||||
// destination (host) UAV accessed by the copying shader, since the shader
|
||||
// may copy multiple blocks per one invocation.
|
||||
uint32_t srv_bpe_log2;
|
||||
uint32_t uav_bpe_log2;
|
||||
// Number of host blocks (or texels for uncompressed) along X axis written
|
||||
// by every compute shader thread - rows in the upload buffer are padded to
|
||||
// at least this amount.
|
||||
uint32_t host_x_blocks_per_thread;
|
||||
};
|
||||
|
||||
struct HostFormat {
|
||||
// Format info for the regular case.
|
||||
// DXGI format (typeless when different signedness or number representation
|
||||
// is used) for the texture resource.
|
||||
DXGI_FORMAT dxgi_format_resource;
|
||||
// DXGI format for unsigned normalized or unsigned/signed float SRV.
|
||||
DXGI_FORMAT dxgi_format_unorm;
|
||||
// The regular load mode, used when special modes (like signed-specific or
|
||||
// decompressing) aren't needed.
|
||||
LoadMode load_mode;
|
||||
// DXGI format for signed normalized or unsigned/signed float SRV.
|
||||
DXGI_FORMAT dxgi_format_snorm;
|
||||
// If the signed version needs a different bit representation on the host,
|
||||
// this is the load mode for the signed version. Otherwise the regular
|
||||
// load_mode will be used for the signed version, and a single copy will be
|
||||
// created if both unsigned and signed are used.
|
||||
LoadMode load_mode_snorm;
|
||||
|
||||
// Do NOT add integer DXGI formats to this - they are not filterable, can
|
||||
// only be read with Load, not Sample! If any game is seen using num_format
|
||||
// 1 for fixed-point formats (for floating-point, it's normally set to 1
|
||||
// though), add a constant buffer containing multipliers for the
|
||||
// textures and multiplication to the tfetch implementation.
|
||||
|
||||
// Whether the DXGI format, if not uncompressing the texture, consists of
|
||||
// blocks, thus copy regions must be aligned to block size.
|
||||
bool dxgi_format_block_aligned;
|
||||
// Uncompression info for when the regular host format for this texture is
|
||||
// block-compressed, but the size is not block-aligned, and thus such
|
||||
// texture cannot be created in Direct3D on PC and needs decompression,
|
||||
// however, such textures are common, for instance, in 4D5307E6. This only
|
||||
// supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not
|
||||
// used for DXN and DXT5A.
|
||||
DXGI_FORMAT dxgi_format_uncompressed;
|
||||
LoadMode decompress_mode;
|
||||
|
||||
// Mapping of Xenos swizzle components to DXGI format components.
|
||||
uint8_t swizzle[4];
|
||||
};
|
||||
|
||||
struct Texture {
|
||||
TextureKey key;
|
||||
ID3D12Resource* resource;
|
||||
uint64_t resource_size;
|
||||
D3D12_RESOURCE_STATES state;
|
||||
// Whether the most up-to-date base / mips contain pages with data from a
|
||||
// resolve operation (rather than from the CPU or memexport), primarily for
|
||||
// choosing between piecewise linear gamma and sRGB when the former is
|
||||
// emulated with the latter.
|
||||
bool base_resolved;
|
||||
bool mips_resolved;
|
||||
|
||||
uint64_t last_usage_frame;
|
||||
uint64_t last_usage_time;
|
||||
Texture* used_previous;
|
||||
Texture* used_next;
|
||||
|
||||
texture_util::TextureGuestLayout guest_layout;
|
||||
|
||||
// For bindful - indices in the non-shader-visible descriptor cache for
|
||||
// copying to the shader-visible heap (much faster than recreating, which,
|
||||
// according to profiling, was often a bottleneck in many games).
|
||||
// For bindless - indices in the global shader-visible descriptor heap.
|
||||
std::unordered_map<uint32_t, uint32_t> srv_descriptors;
|
||||
|
||||
// These are to be accessed within the global critical region to synchronize
|
||||
// with shared memory.
|
||||
// Watch handles for the memory ranges.
|
||||
SharedMemory::WatchHandle base_watch_handle;
|
||||
SharedMemory::WatchHandle mip_watch_handle;
|
||||
// Whether the recent base level data has been loaded from the memory.
|
||||
bool base_in_sync;
|
||||
// Whether the recent mip data has been loaded from the memory.
|
||||
bool mips_in_sync;
|
||||
|
||||
bool IsResolved() const { return base_resolved || mips_resolved; }
|
||||
uint32_t GetGuestBaseSize() const {
|
||||
return guest_layout.base.level_data_extent_bytes;
|
||||
}
|
||||
uint32_t GetGuestMipsSize() const {
|
||||
return guest_layout.mips_total_extent_bytes;
|
||||
}
|
||||
};
|
||||
|
||||
struct SRVDescriptorCachePage {
|
||||
static constexpr uint32_t kHeapSize = 65536;
|
||||
ID3D12DescriptorHeap* heap;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE heap_start;
|
||||
};
|
||||
|
||||
struct LoadConstants {
|
||||
// vec4 0.
|
||||
uint32_t is_tiled_3d_endian_scale;
|
||||
// Base offset in bytes, resolution-scaled.
|
||||
uint32_t guest_offset;
|
||||
// For tiled textures - row pitch in blocks, aligned to 32, unscaled.
|
||||
// For linear textures - row pitch in bytes.
|
||||
uint32_t guest_pitch_aligned;
|
||||
// For 3D textures only (ignored otherwise) - aligned to 32, unscaled.
|
||||
uint32_t guest_z_stride_block_rows_aligned;
|
||||
|
||||
// vec4 1.
|
||||
// If this is a packed mip tail, this is aligned to tile dimensions.
|
||||
// Resolution-scaled.
|
||||
uint32_t size_blocks[3];
|
||||
// Base offset in bytes.
|
||||
uint32_t host_offset;
|
||||
|
||||
// vec4 2.
|
||||
uint32_t host_pitch;
|
||||
uint32_t height_texels;
|
||||
};
|
||||
|
||||
struct TextureBinding {
|
||||
TextureKey key;
|
||||
// Destination swizzle merged with guest->host format swizzle.
|
||||
uint32_t host_swizzle;
|
||||
// Packed TextureSign values, 2 bit per each component, with guest-side
|
||||
// destination swizzle from the fetch constant applied to them.
|
||||
uint8_t swizzled_signs;
|
||||
// Unsigned version of the texture (or signed if they have the same data).
|
||||
Texture* texture;
|
||||
// Signed version of the texture if the data in the signed version is
|
||||
// different on the host.
|
||||
Texture* texture_signed;
|
||||
// Descriptor indices of texture and texture_signed returned from
|
||||
// FindOrCreateTextureDescriptor.
|
||||
uint32_t descriptor_index;
|
||||
uint32_t descriptor_index_signed;
|
||||
void Clear() {
|
||||
std::memset(this, 0, sizeof(*this));
|
||||
descriptor_index = descriptor_index_signed = UINT32_MAX;
|
||||
}
|
||||
};
|
||||
|
||||
static uint32_t GetMaxHostTextureWidthHeight(xenos::DataDimension dimension) {
|
||||
switch (dimension) {
|
||||
case xenos::DataDimension::k1D:
|
||||
case xenos::DataDimension::k2DOrStacked:
|
||||
// 1D and 2D are emulated as 2D arrays.
|
||||
return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
|
||||
case xenos::DataDimension::k3D:
|
||||
return D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
|
||||
case xenos::DataDimension::kCube:
|
||||
return D3D12_REQ_TEXTURECUBE_DIMENSION;
|
||||
default:
|
||||
assert_unhandled_case(dimension);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
static uint32_t GetMaxHostTextureDepthOrArraySize(
|
||||
xenos::DataDimension dimension) {
|
||||
switch (dimension) {
|
||||
case xenos::DataDimension::k1D:
|
||||
case xenos::DataDimension::k2DOrStacked:
|
||||
// 1D and 2D are emulated as 2D arrays.
|
||||
return D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION;
|
||||
case xenos::DataDimension::k3D:
|
||||
return D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
|
||||
case xenos::DataDimension::kCube:
|
||||
return D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION / 6 * 6;
|
||||
default:
|
||||
assert_unhandled_case(dimension);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
class ScaledResolveVirtualBuffer {
|
||||
public:
|
||||
ScaledResolveVirtualBuffer(ID3D12Resource* resource,
|
||||
D3D12_RESOURCE_STATES resource_state)
|
||||
: resource_(resource), resource_state_(resource_state) {}
|
||||
ID3D12Resource* resource() const { return resource_.Get(); }
|
||||
D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) {
|
||||
D3D12_RESOURCE_STATES old_state = resource_state_;
|
||||
if (old_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||
uav_barrier_pending_ = false;
|
||||
}
|
||||
resource_state_ = new_state;
|
||||
return old_state;
|
||||
}
|
||||
// After writing through a UAV.
|
||||
void SetUAVBarrierPending() {
|
||||
if (resource_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||
uav_barrier_pending_ = true;
|
||||
}
|
||||
}
|
||||
// After an aliasing barrier (which is even stronger than an UAV barrier).
|
||||
void ClearUAVBarrierPending() { uav_barrier_pending_ = false; }
|
||||
|
||||
private:
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> resource_;
|
||||
D3D12_RESOURCE_STATES resource_state_;
|
||||
bool uav_barrier_pending_ = false;
|
||||
};
|
||||
|
||||
// Whether the signed version of the texture has a different representation on
|
||||
// the host than its unsigned version (for example, if it's a fixed-point
|
||||
// texture emulated with a larger host pixel format).
|
||||
static bool IsSignedVersionSeparate(xenos::TextureFormat format) {
|
||||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return host_format.load_mode_snorm != LoadMode::kUnknown &&
|
||||
host_format.load_mode_snorm != host_format.load_mode;
|
||||
}
|
||||
// Whether decompression is needed on the host (Direct3D only allows creation
|
||||
// of block-compressed textures with 4x4-aligned dimensions on PC).
|
||||
static bool IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width,
|
||||
uint32_t height);
|
||||
static DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
|
||||
uint32_t width, uint32_t height) {
|
||||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return IsDecompressionNeeded(format, width, height)
|
||||
? host_format.dxgi_format_uncompressed
|
||||
: host_format.dxgi_format_resource;
|
||||
}
|
||||
static DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
|
||||
return GetDXGIResourceFormat(key.format, key.GetWidth(), key.GetHeight());
|
||||
}
|
||||
static DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
|
||||
uint32_t width, uint32_t height) {
|
||||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return IsDecompressionNeeded(format, width, height)
|
||||
? host_format.dxgi_format_uncompressed
|
||||
: host_format.dxgi_format_unorm;
|
||||
}
|
||||
static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
|
||||
return GetDXGIUnormFormat(key.format, key.GetWidth(), key.GetHeight());
|
||||
}
|
||||
|
||||
static LoadMode GetLoadMode(TextureKey key);
|
||||
|
||||
// Converts a texture fetch constant to a texture key, normalizing and
|
||||
// validating the values, or creating an invalid key, and also gets the
|
||||
// host swizzle and post-guest-swizzle signedness.
|
||||
static void BindingInfoFromFetchConstant(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out,
|
||||
uint32_t* host_swizzle_out, uint8_t* swizzled_signs_out);
|
||||
|
||||
static constexpr bool AreDimensionsCompatible(
|
||||
xenos::FetchOpDimension binding_dimension,
|
||||
xenos::DataDimension resource_dimension) {
|
||||
switch (binding_dimension) {
|
||||
case xenos::FetchOpDimension::k1D:
|
||||
case xenos::FetchOpDimension::k2D:
|
||||
return resource_dimension == xenos::DataDimension::k1D ||
|
||||
resource_dimension == xenos::DataDimension::k2DOrStacked;
|
||||
case xenos::FetchOpDimension::k3DOrStacked:
|
||||
return resource_dimension == xenos::DataDimension::k3D;
|
||||
case xenos::FetchOpDimension::kCube:
|
||||
return resource_dimension == xenos::DataDimension::kCube;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void LogTextureKeyAction(TextureKey key, const char* action);
|
||||
static void LogTextureAction(const Texture* texture, const char* action);
|
||||
|
||||
// Returns nullptr if the key is not supported, but also if couldn't create
|
||||
// the texture - if it's nullptr, occasionally a recreation attempt should be
|
||||
// made.
|
||||
Texture* FindOrCreateTexture(TextureKey key);
|
||||
|
||||
// Writes data from the shared memory to the texture. This binds pipelines,
|
||||
// allocates descriptors and copies!
|
||||
bool LoadTextureData(Texture* texture);
|
||||
|
||||
// Returns the index of an existing of a newly created non-shader-visible
|
||||
// cached (for bindful) or a shader-visible global (for bindless) descriptor,
|
||||
// or UINT32_MAX if failed to create.
|
||||
uint32_t FindOrCreateTextureDescriptor(Texture& texture, bool is_signed,
|
||||
uint32_t host_swizzle);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE GetTextureDescriptorCPUHandle(
|
||||
uint32_t descriptor_index) const;
|
||||
|
||||
// For LRU caching - updates the last usage frame and moves the texture to
|
||||
// the end of the usage queue. Must be called any time the texture is
|
||||
// referenced by any command list to make sure it's not destroyed while still
|
||||
// in use.
|
||||
void MarkTextureUsed(Texture* texture);
|
||||
|
||||
// Shared memory callback for texture data invalidation.
|
||||
static void WatchCallbackThunk(void* context, void* data, uint64_t argument,
|
||||
bool invalidated_by_gpu);
|
||||
void WatchCallback(Texture* texture, bool is_mip);
|
||||
|
||||
// Makes all bindings invalid. Also requesting textures after calling this
|
||||
// will cause another attempt to create a texture or to untile it if there was
|
||||
// an error.
|
||||
void ClearBindings();
|
||||
|
||||
size_t GetScaledResolveBufferCount() const {
|
||||
assert_true(IsDrawResolutionScaled());
|
||||
// Make sure any range up to 1 GB is accessible through 1 or 2 buffers.
|
||||
// 2x2 scale buffers - just one 2 GB buffer for all 2 GB.
|
||||
// 3x3 scale buffers - 4 buffers:
|
||||
// +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5
|
||||
// |___________________|___________________|
|
||||
// |___________________|______________|
|
||||
// Buffer N has an offset of N * 1 GB in the scaled resolve address space.
|
||||
// The logic is:
|
||||
// - 2 GB can be accessed through a [0 GB ... 2 GB) buffer - only need one.
|
||||
// - 2.1 GB needs [0 GB ... 2 GB) and [1 GB ... 2.1 GB) - two buffers.
|
||||
// - 3 GB needs [0 GB ... 2 GB) and [1 GB ... 3 GB) - two buffers.
|
||||
// - 3.1 GB needs [0 GB ... 2 GB), [1 GB ... 3 GB) and [2 GB ... 3.1 GB) -
|
||||
// three buffers.
|
||||
uint64_t address_space_size =
|
||||
uint64_t(SharedMemory::kBufferSize) *
|
||||
(draw_resolution_scale_x_ * draw_resolution_scale_y_);
|
||||
return size_t((address_space_size - 1) >> 30);
|
||||
}
|
||||
// Returns indices of two scaled resolve virtual buffers that the location in
|
||||
// memory may be accessible through. May be the same if it's a location near
|
||||
// the beginning or the end of the address represented only by one buffer.
|
||||
std::array<size_t, 2> GetPossibleScaledResolveBufferIndices(
|
||||
uint64_t address_scaled) const {
|
||||
assert_true(IsDrawResolutionScaled());
|
||||
size_t address_gb = size_t(address_scaled >> 30);
|
||||
size_t max_index = GetScaledResolveBufferCount() - 1;
|
||||
// In different cases for 3x3:
|
||||
// +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5
|
||||
// |12________2________|1_________2________|
|
||||
// |1_________2________|1_________12__|
|
||||
return std::array<size_t, 2>{
|
||||
std::min(address_gb, max_index),
|
||||
std::min(std::max(address_gb, size_t(1)) - size_t(1), max_index)};
|
||||
}
|
||||
// Checks if there are any pages that contain scaled resolve data within the
|
||||
// range.
|
||||
bool IsRangeScaledResolved(uint32_t start_unscaled, uint32_t length_unscaled);
|
||||
// Global shared memory invalidation callback for invalidating scaled resolved
|
||||
// texture data.
|
||||
static void ScaledResolveGlobalWatchCallbackThunk(void* context,
|
||||
uint32_t address_first,
|
||||
uint32_t address_last,
|
||||
bool invalidated_by_gpu);
|
||||
void ScaledResolveGlobalWatchCallback(uint32_t address_first,
|
||||
uint32_t address_last,
|
||||
bool invalidated_by_gpu);
|
||||
// The index is also the gigabyte offset of the buffer from the start of the
|
||||
// scaled physical memory address space.
|
||||
size_t GetCurrentScaledResolveBufferIndex() const {
|
||||
return scaled_resolve_1gb_buffer_indices_
|
||||
[scaled_resolve_current_range_start_scaled_ >> 30];
|
||||
}
|
||||
ScaledResolveVirtualBuffer& GetCurrentScaledResolveBuffer() {
|
||||
ScaledResolveVirtualBuffer* scaled_resolve_buffer =
|
||||
scaled_resolve_2gb_buffers_[GetCurrentScaledResolveBufferIndex()];
|
||||
assert_not_null(scaled_resolve_buffer);
|
||||
return *scaled_resolve_buffer;
|
||||
}
|
||||
|
||||
static const HostFormat host_formats_[64];
|
||||
|
||||
static const char* const dimension_names_[4];
|
||||
|
||||
D3D12CommandProcessor& command_processor_;
|
||||
const RegisterFile& register_file_;
|
||||
D3D12SharedMemory& shared_memory_;
|
||||
bool bindless_resources_used_;
|
||||
|
||||
static const LoadModeInfo load_mode_info_[];
|
||||
ID3D12RootSignature* load_root_signature_ = nullptr;
|
||||
ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
|
||||
// Load pipelines for resolution-scaled resolve targets.
|
||||
ID3D12PipelineState* load_pipelines_scaled_[size_t(LoadMode::kCount)] = {};
|
||||
|
||||
std::unordered_map<TextureKey, Texture*, TextureKey::Hasher> textures_;
|
||||
uint64_t textures_total_size_ = 0;
|
||||
Texture* texture_used_first_ = nullptr;
|
||||
Texture* texture_used_last_ = nullptr;
|
||||
uint64_t texture_current_usage_time_;
|
||||
|
||||
std::vector<SRVDescriptorCachePage> srv_descriptor_cache_;
|
||||
uint32_t srv_descriptor_cache_allocated_;
|
||||
// Indices of cached descriptors used by deleted textures, for reuse.
|
||||
std::vector<uint32_t> srv_descriptor_cache_free_;
|
||||
|
||||
enum class NullSRVDescriptorIndex {
|
||||
k2DArray,
|
||||
k3D,
|
||||
kCube,
|
||||
|
||||
kCount,
|
||||
};
|
||||
// Contains null SRV descriptors of dimensions from NullSRVDescriptorIndex.
|
||||
// For copying, not shader-visible.
|
||||
ID3D12DescriptorHeap* null_srv_descriptor_heap_ = nullptr;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE null_srv_descriptor_heap_start_;
|
||||
|
||||
TextureBinding texture_bindings_[32] = {};
|
||||
// Bit vector with bits reset on fetch constant writes to avoid parsing fetch
|
||||
// constants again and again.
|
||||
uint32_t texture_bindings_in_sync_ = 0;
|
||||
|
||||
// Whether a texture has been invalidated (a watch has been triggered), so
|
||||
// need to try to reload textures, disregarding whether fetch constants have
|
||||
// been changed.
|
||||
std::atomic<bool> texture_invalidated_ = false;
|
||||
|
||||
// Unsupported texture formats used during this frame (for research and
|
||||
// testing).
|
||||
enum : uint8_t {
|
||||
kUnsupportedResourceBit = 1,
|
||||
kUnsupportedUnormBit = kUnsupportedResourceBit << 1,
|
||||
kUnsupportedSnormBit = kUnsupportedUnormBit << 1,
|
||||
};
|
||||
uint8_t unsupported_format_features_used_[64];
|
||||
|
||||
uint32_t draw_resolution_scale_x_ = 1;
|
||||
uint32_t draw_resolution_scale_y_ = 1;
|
||||
// The tiled buffer for resolved data with resolution scaling.
|
||||
// Because on Direct3D 12 (at least on Windows 10 2004) typed SRV or UAV
|
||||
// creation fails for offsets above 4 GB, a single tiled 4.5 GB buffer can't
|
||||
// be used for 3x3 resolution scaling.
|
||||
// Instead, "sliding window" buffers allowing to access a single range of up
|
||||
// to 1 GB (or up to 2 GB, depending on the low bits) at any moment are used.
|
||||
// Parts of 4.5 GB address space can be accessed through 2 GB buffers as:
|
||||
// +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5
|
||||
// |___________________|___________________| or
|
||||
// |___________________|______________|
|
||||
// (2 GB is also the amount of scaled physical memory with 2x resolution
|
||||
// scale, and older Intel GPUs, while support tiled resources, only support 31
|
||||
// virtual address bits per resource).
|
||||
// Index is first gigabyte. Only including buffers containing over 1 GB
|
||||
// (because otherwise the data will be fully contained in another).
|
||||
// Size is calculated the same as in GetScaledResolveBufferCount.
|
||||
ScaledResolveVirtualBuffer*
|
||||
scaled_resolve_2gb_buffers_[(uint64_t(SharedMemory::kBufferSize) *
|
||||
(kMaxDrawResolutionScaleAlongAxis *
|
||||
kMaxDrawResolutionScaleAlongAxis) -
|
||||
1) >>
|
||||
30] = {};
|
||||
// Not very big heaps (16 MB) because they are needed pretty sparsely. One
|
||||
// 2x-scaled 1280x720x32bpp texture is slighly bigger than 14 MB.
|
||||
static constexpr uint32_t kScaledResolveHeapSizeLog2 = 24;
|
||||
static constexpr uint32_t kScaledResolveHeapSize =
|
||||
uint32_t(1) << kScaledResolveHeapSizeLog2;
|
||||
static_assert(
|
||||
(kScaledResolveHeapSize % D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) == 0,
|
||||
"Scaled resolve heap size must be a multiple of Direct3D tile size");
|
||||
static_assert(
|
||||
kScaledResolveHeapSizeLog2 <= SharedMemory::kBufferSizeLog2,
|
||||
"Scaled resolve heaps are assumed to be wholly mappable irrespective of "
|
||||
"resolution scale, never truncated, for example, if the scaled resolve "
|
||||
"address space is 4.5 GB, but the heap size is 1 GB");
|
||||
static_assert(
|
||||
kScaledResolveHeapSizeLog2 <= 30,
|
||||
"Scaled resolve heaps are assumed to only be wholly mappable to up to "
|
||||
"two 2 GB buffers");
|
||||
// Resident portions of the tiled buffer.
|
||||
std::vector<ID3D12Heap*> scaled_resolve_heaps_;
|
||||
// Number of currently resident portions of the tiled buffer, for profiling.
|
||||
uint32_t scaled_resolve_heap_count_ = 0;
|
||||
// Global watch for scaled resolve data invalidation.
|
||||
SharedMemory::GlobalWatchHandle scaled_resolve_global_watch_handle_ = nullptr;
|
||||
// Current scaled resolve state.
|
||||
// For aliasing barrier placement, last owning buffer index for each of 1 GB.
|
||||
size_t
|
||||
scaled_resolve_1gb_buffer_indices_[(uint64_t(SharedMemory::kBufferSize) *
|
||||
kMaxDrawResolutionScaleAlongAxis *
|
||||
kMaxDrawResolutionScaleAlongAxis +
|
||||
((uint32_t(1) << 30) - 1)) >>
|
||||
30];
|
||||
// Range used in the last successful MakeScaledResolveRangeCurrent call.
|
||||
uint64_t scaled_resolve_current_range_start_scaled_;
|
||||
uint64_t scaled_resolve_current_range_length_scaled_;
|
||||
|
||||
xe::global_critical_region global_critical_region_;
|
||||
// Bit vector storing whether each 4 KB physical memory page contains scaled
|
||||
// resolve data. uint32_t rather than uint64_t because parts of it can be sent
|
||||
// to shaders.
|
||||
uint32_t* scaled_resolve_pages_ = nullptr;
|
||||
// Second level of the bit vector for faster rejection of non-scaled textures.
|
||||
// >> 12 for 4 KB pages, >> 5 for uint32_t level 1 bits, >> 6 for uint64_t
|
||||
// level 2 bits.
|
||||
uint64_t scaled_resolve_pages_l2_[SharedMemory::kBufferSize >> (12 + 5 + 6)];
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_D3D12_TEXTURE_CACHE_H_
|
|
@ -20,6 +20,7 @@
|
|||
#include "xenia/base/memory.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/texture_cache.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/texture_util.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
@ -166,15 +167,17 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader,
|
|||
return false;
|
||||
}
|
||||
|
||||
void GetHostViewportInfo(const RegisterFile& regs, uint32_t resolution_scale_x,
|
||||
uint32_t resolution_scale_y, bool origin_bottom_left,
|
||||
uint32_t x_max, uint32_t y_max, bool allow_reverse_z,
|
||||
void GetHostViewportInfo(const RegisterFile& regs,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y,
|
||||
bool origin_bottom_left, uint32_t x_max,
|
||||
uint32_t y_max, bool allow_reverse_z,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||
bool convert_z_to_float24, bool full_float24_in_0_to_1,
|
||||
bool pixel_shader_writes_depth,
|
||||
ViewportInfo& viewport_info_out) {
|
||||
assert_not_zero(resolution_scale_x);
|
||||
assert_not_zero(resolution_scale_y);
|
||||
assert_not_zero(draw_resolution_scale_x);
|
||||
assert_not_zero(draw_resolution_scale_y);
|
||||
|
||||
// A vertex position goes the following path:
|
||||
//
|
||||
|
@ -343,8 +346,8 @@ void GetHostViewportInfo(const RegisterFile& regs, uint32_t resolution_scale_x,
|
|||
|
||||
// The maximum value is at least the maximum host render target size anyway -
|
||||
// and a guest pixel is always treated as a whole with resolution scaling.
|
||||
uint32_t xy_max_unscaled[] = {x_max / resolution_scale_x,
|
||||
y_max / resolution_scale_y};
|
||||
uint32_t xy_max_unscaled[] = {x_max / draw_resolution_scale_x,
|
||||
y_max / draw_resolution_scale_y};
|
||||
assert_not_zero(xy_max_unscaled[0]);
|
||||
assert_not_zero(xy_max_unscaled[1]);
|
||||
|
||||
|
@ -363,7 +366,8 @@ void GetHostViewportInfo(const RegisterFile& regs, uint32_t resolution_scale_x,
|
|||
uint32_t extent_axis_unscaled =
|
||||
std::min(xenos::kTexture2DCubeMaxWidthHeight, xy_max_unscaled[i]);
|
||||
viewport_info_out.xy_extent[i] =
|
||||
extent_axis_unscaled * (i ? resolution_scale_y : resolution_scale_x);
|
||||
extent_axis_unscaled *
|
||||
(i ? draw_resolution_scale_y : draw_resolution_scale_x);
|
||||
float extent_axis_unscaled_float = float(extent_axis_unscaled);
|
||||
float pixels_to_ndc_axis = 2.0f / extent_axis_unscaled_float;
|
||||
ndc_scale[i] = scale_xy[i] * pixels_to_ndc_axis;
|
||||
|
@ -390,7 +394,7 @@ void GetHostViewportInfo(const RegisterFile& regs, uint32_t resolution_scale_x,
|
|||
// doing truncation for simplicity - since maxing with 0 is done anyway
|
||||
// (we only return viewports in the positive quarter-plane).
|
||||
uint32_t axis_resolution_scale =
|
||||
i ? resolution_scale_y : resolution_scale_x;
|
||||
i ? draw_resolution_scale_y : draw_resolution_scale_x;
|
||||
float offset_axis = offset_base_xy[i] + offset_add_xy[i];
|
||||
float scale_axis = scale_xy[i];
|
||||
float scale_axis_abs = std::abs(scale_xy[i]);
|
||||
|
@ -645,6 +649,31 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs,
|
|||
return normalized_color_mask;
|
||||
}
|
||||
|
||||
void GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
uint32_t draw_resolution_scale_x, uint32_t& divide_scale_out,
|
||||
uint32_t& divide_upper_shift_out) {
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"GetEdramTileWidthDivideScaleAndUpperShift provides values only for draw "
|
||||
"resolution scaling factors of up to 3");
|
||||
switch (draw_resolution_scale_x) {
|
||||
case 1:
|
||||
divide_scale_out = kDivideScale5;
|
||||
divide_upper_shift_out = kDivideUpperShift5 + 4;
|
||||
break;
|
||||
case 2:
|
||||
divide_scale_out = kDivideScale5;
|
||||
divide_upper_shift_out = kDivideUpperShift5 + 5;
|
||||
break;
|
||||
case 3:
|
||||
divide_scale_out = kDivideScale15;
|
||||
divide_upper_shift_out = kDivideUpperShift15 + 4;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(draw_resolution_scale_x);
|
||||
}
|
||||
}
|
||||
|
||||
xenos::CopySampleSelect SanitizeCopySampleSelect(
|
||||
xenos::CopySampleSelect copy_sample_select, xenos::MsaaSamples msaa_samples,
|
||||
bool is_depth) {
|
||||
|
@ -1098,7 +1127,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
|
|||
}
|
||||
|
||||
ResolveCopyShaderIndex ResolveInfo::GetCopyShader(
|
||||
uint32_t resolution_scale_x, uint32_t resolution_scale_y,
|
||||
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y,
|
||||
ResolveCopyShaderConstants& constants_out, uint32_t& group_count_x_out,
|
||||
uint32_t& group_count_y_out) const {
|
||||
ResolveCopyShaderIndex shader = ResolveCopyShaderIndex::kUnknown;
|
||||
|
@ -1152,10 +1181,10 @@ ResolveCopyShaderIndex ResolveInfo::GetCopyShader(
|
|||
if (shader != ResolveCopyShaderIndex::kUnknown) {
|
||||
uint32_t width =
|
||||
(address.width_div_8 << xenos::kResolveAlignmentPixelsLog2) *
|
||||
resolution_scale_x;
|
||||
draw_resolution_scale_x;
|
||||
uint32_t height =
|
||||
(address.height_div_8 << xenos::kResolveAlignmentPixelsLog2) *
|
||||
resolution_scale_y;
|
||||
draw_resolution_scale_y;
|
||||
const ResolveCopyShaderInfo& shader_info =
|
||||
resolve_copy_shader_info[size_t(shader)];
|
||||
group_count_x_out = (width + ((1 << shader_info.group_size_x_log2) - 1)) >>
|
||||
|
|
|
@ -196,9 +196,11 @@ struct ViewportInfo {
|
|||
// a viewport, plus values to multiply-add the returned position by, usable on
|
||||
// host graphics APIs such as Direct3D 11+ and Vulkan, also forcing it to the
|
||||
// Direct3D clip space with 0...W Z rather than -W...W.
|
||||
void GetHostViewportInfo(const RegisterFile& regs, uint32_t resolution_scale_x,
|
||||
uint32_t resolution_scale_y, bool origin_bottom_left,
|
||||
uint32_t x_max, uint32_t y_max, bool allow_reverse_z,
|
||||
void GetHostViewportInfo(const RegisterFile& regs,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y,
|
||||
bool origin_bottom_left, uint32_t x_max,
|
||||
uint32_t y_max, bool allow_reverse_z,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||
bool convert_z_to_float24, bool full_float24_in_0_to_1,
|
||||
bool pixel_shader_writes_depth,
|
||||
|
@ -234,26 +236,9 @@ constexpr uint32_t kDivideUpperShift5 = 2;
|
|||
constexpr uint32_t kDivideScale15 = 0x88888889u;
|
||||
constexpr uint32_t kDivideUpperShift15 = 3;
|
||||
|
||||
inline void GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
uint32_t resolution_scale_x, uint32_t& divide_scale,
|
||||
uint32_t& divide_upper_shift) {
|
||||
switch (resolution_scale_x) {
|
||||
case 1:
|
||||
divide_scale = kDivideScale5;
|
||||
divide_upper_shift = kDivideUpperShift5 + 4;
|
||||
break;
|
||||
case 2:
|
||||
divide_scale = kDivideScale5;
|
||||
divide_upper_shift = kDivideUpperShift5 + 5;
|
||||
break;
|
||||
case 3:
|
||||
divide_scale = kDivideScale15;
|
||||
divide_upper_shift = kDivideUpperShift15 + 4;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(resolution_scale_x);
|
||||
}
|
||||
}
|
||||
void GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
uint32_t draw_resolution_scale_x, uint32_t& divide_scale_out,
|
||||
uint32_t& divide_upper_shift_out);
|
||||
|
||||
// Never an identity conversion - can always write conditional move instructions
|
||||
// to shaders that will be no-ops for conversion from guest to host samples.
|
||||
|
@ -474,7 +459,7 @@ struct ResolveInfo {
|
|||
}
|
||||
|
||||
ResolveCopyShaderIndex GetCopyShader(
|
||||
uint32_t resolution_scale_x, uint32_t resolution_scale_y,
|
||||
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y,
|
||||
ResolveCopyShaderConstants& constants_out, uint32_t& group_count_x_out,
|
||||
uint32_t& group_count_y_out) const;
|
||||
|
||||
|
@ -509,7 +494,8 @@ struct ResolveInfo {
|
|||
}
|
||||
|
||||
std::pair<uint32_t, uint32_t> GetClearShaderGroupCount(
|
||||
uint32_t resolution_scale_x, uint32_t resolution_scale_y) const {
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y) const {
|
||||
// 8 guest MSAA samples per invocation.
|
||||
uint32_t width_samples_div_8 = address.width_div_8;
|
||||
uint32_t height_samples_div_8 = address.height_div_8;
|
||||
|
@ -522,8 +508,8 @@ struct ResolveInfo {
|
|||
width_samples_div_8 <<= 1;
|
||||
}
|
||||
}
|
||||
width_samples_div_8 *= resolution_scale_x;
|
||||
height_samples_div_8 *= resolution_scale_y;
|
||||
width_samples_div_8 *= draw_resolution_scale_x;
|
||||
height_samples_div_8 *= draw_resolution_scale_y;
|
||||
return std::make_pair((width_samples_div_8 + uint32_t(7)) >> 3,
|
||||
height_samples_div_8);
|
||||
}
|
||||
|
|
|
@ -81,10 +81,8 @@ DxbcShaderTranslator::DxbcShaderTranslator(
|
|||
draw_resolution_scale_x_(draw_resolution_scale_x),
|
||||
draw_resolution_scale_y_(draw_resolution_scale_y),
|
||||
emit_source_map_(force_emit_source_map || cvars::dxbc_source_map) {
|
||||
assert_true(draw_resolution_scale_x >= 1);
|
||||
assert_true(draw_resolution_scale_x <= 3);
|
||||
assert_true(draw_resolution_scale_y >= 1);
|
||||
assert_true(draw_resolution_scale_y <= 3);
|
||||
assert_not_zero(draw_resolution_scale_x);
|
||||
assert_not_zero(draw_resolution_scale_y);
|
||||
// Don't allocate again and again for the first shader.
|
||||
shader_code_.reserve(8192);
|
||||
shader_object_.reserve(16384);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "xenia/base/math.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/texture_cache.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -159,6 +160,11 @@ void DxbcShaderTranslator::ExportToMemory() {
|
|||
dxbc::Src::R(control_temp).Select(1 + i));
|
||||
uint32_t axis_resolution_scale =
|
||||
i ? draw_resolution_scale_y_ : draw_resolution_scale_x_;
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"DxbcShaderTranslator memexport draw resolution scaling "
|
||||
"conditional generation supports draw resolution scaling factors "
|
||||
"of only up to 3");
|
||||
switch (axis_resolution_scale) {
|
||||
case 2:
|
||||
// xy & 1 == 1.
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/texture_cache.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -200,6 +201,10 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
assert_not_zero(tile_or_half_tile_width_divide_upper_shift);
|
||||
--tile_or_half_tile_width_divide_upper_shift;
|
||||
}
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"DxbcShaderTranslator ROV sample address calculation supports Y draw "
|
||||
"resolution scaling factors of only up to 3");
|
||||
if (draw_resolution_scale_y_ == 3) {
|
||||
// Multiplication part of the division by 40|80 x 16 x scale (specifically
|
||||
// 40|80 * scale width here, and 48 height, or 16 * 3 height).
|
||||
|
|
|
@ -76,25 +76,6 @@ DEFINE_string(
|
|||
" Any other value:\n"
|
||||
" Choose what is considered the most optimal (currently \"on_copy\").",
|
||||
"GPU");
|
||||
DEFINE_int32(
|
||||
draw_resolution_scale_x, 1,
|
||||
"Integer pixel width scale used for scaling the rendering resolution "
|
||||
"opaquely to the game.\n"
|
||||
"1, 2 and 3 may be supported, but support of anything above 1 depends on "
|
||||
"the device properties, such as whether it supports sparse binding / tiled "
|
||||
"resources, the number of virtual address bits per resource, and other "
|
||||
"factors.\n"
|
||||
"Various effects and parts of game rendering pipelines may work "
|
||||
"incorrectly as pixels become ambiguous from the game's perspective and "
|
||||
"because half-pixel offset (which normally doesn't affect coverage when "
|
||||
"MSAA isn't used) becomes full-pixel.",
|
||||
"GPU");
|
||||
DEFINE_int32(
|
||||
draw_resolution_scale_y, 1,
|
||||
"Integer pixel width scale used for scaling the rendering resolution "
|
||||
"opaquely to the game.\n"
|
||||
"See draw_resolution_scale_x for more information.",
|
||||
"GPU");
|
||||
DEFINE_bool(
|
||||
draw_resolution_scaled_texture_offsets, true,
|
||||
"Apply offsets from texture fetch instructions taking resolution scale "
|
||||
|
@ -416,7 +397,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
|
|||
uint32_t pitch_pixels_tile_aligned_scaled =
|
||||
pitch_tiles_at_32bpp *
|
||||
(xenos::kEdramTileWidthSamples >> msaa_samples_x_log2) *
|
||||
GetResolutionScaleX();
|
||||
draw_resolution_scale_x();
|
||||
uint32_t max_render_target_width = GetMaxRenderTargetWidth();
|
||||
if (pitch_pixels_tile_aligned_scaled > max_render_target_width) {
|
||||
// TODO(Triang3l): If really needed for some game on some device, clamp
|
||||
|
@ -834,14 +815,13 @@ uint32_t RenderTargetCache::GetRenderTargetHeight(
|
|||
!(xenos::kTexture2DCubeMaxWidthHeight % xenos::kEdramTileHeightSamples),
|
||||
"Maximum guest render target height is assumed to always be a multiple "
|
||||
"of an EDRAM tile height");
|
||||
uint32_t resolution_scale_y = GetResolutionScaleY();
|
||||
uint32_t max_height_scaled =
|
||||
std::min(xenos::kTexture2DCubeMaxWidthHeight * resolution_scale_y,
|
||||
std::min(xenos::kTexture2DCubeMaxWidthHeight * draw_resolution_scale_y(),
|
||||
GetMaxRenderTargetHeight());
|
||||
uint32_t msaa_samples_y_log2 =
|
||||
uint32_t(msaa_samples >= xenos::MsaaSamples::k2X);
|
||||
uint32_t tile_height_samples_scaled =
|
||||
xenos::kEdramTileHeightSamples * resolution_scale_y;
|
||||
xenos::kEdramTileHeightSamples * draw_resolution_scale_y();
|
||||
tile_rows = std::min(tile_rows, (max_height_scaled << msaa_samples_y_log2) /
|
||||
tile_height_samples_scaled);
|
||||
assert_not_zero(tile_rows);
|
||||
|
@ -868,9 +848,9 @@ void RenderTargetCache::GetHostDepthStoreRectangleInfo(
|
|||
(transfer_rectangle.width_pixels >> 3) - 1;
|
||||
rectangle_constant_out = rectangle_constant;
|
||||
// 1 thread group = 64x8 host samples.
|
||||
uint32_t pixel_size_x = GetResolutionScaleX()
|
||||
uint32_t pixel_size_x = draw_resolution_scale_x()
|
||||
<< uint32_t(msaa_samples >= xenos::MsaaSamples::k4X);
|
||||
uint32_t pixel_size_y = GetResolutionScaleY()
|
||||
uint32_t pixel_size_y = draw_resolution_scale_y()
|
||||
<< uint32_t(msaa_samples >= xenos::MsaaSamples::k2X);
|
||||
group_count_x_out =
|
||||
(transfer_rectangle.width_pixels * pixel_size_x + 63) >> 6;
|
||||
|
@ -1001,7 +981,7 @@ bool RenderTargetCache::PrepareHostRenderTargetsResolveClear(
|
|||
uint32_t pitch_pixels =
|
||||
pitch_tiles_at_32bpp *
|
||||
(xenos::kEdramTileWidthSamples >> msaa_samples_x_log2);
|
||||
uint32_t pitch_pixels_scaled = pitch_pixels * GetResolutionScaleX();
|
||||
uint32_t pitch_pixels_scaled = pitch_pixels * draw_resolution_scale_x();
|
||||
uint32_t max_render_target_width = GetMaxRenderTargetWidth();
|
||||
if (pitch_pixels_scaled > max_render_target_width) {
|
||||
// TODO(Triang3l): If really needed for some game on some device, clamp the
|
||||
|
@ -1147,12 +1127,10 @@ RenderTargetCache::RenderTarget*
|
|||
RenderTargetCache::PrepareFullEdram1280xRenderTargetForSnapshotRestoration(
|
||||
xenos::ColorRenderTargetFormat color_format) {
|
||||
assert_true(GetPath() == Path::kHostRenderTargets);
|
||||
uint32_t resolution_scale_x = GetResolutionScaleX();
|
||||
uint32_t resolution_scale_y = GetResolutionScaleY();
|
||||
constexpr uint32_t kPitchTilesAt32bpp = 16;
|
||||
constexpr uint32_t kWidth =
|
||||
kPitchTilesAt32bpp * xenos::kEdramTileWidthSamples;
|
||||
if (kWidth * resolution_scale_x > GetMaxRenderTargetWidth()) {
|
||||
if (kWidth * draw_resolution_scale_x() > GetMaxRenderTargetWidth()) {
|
||||
return nullptr;
|
||||
}
|
||||
// Same render target height is used for 32bpp and 64bpp to allow mixing them.
|
||||
|
@ -1168,7 +1146,7 @@ RenderTargetCache::PrepareFullEdram1280xRenderTargetForSnapshotRestoration(
|
|||
"Using width of the render target for EDRAM snapshot restoration that is "
|
||||
"expect to fully cover the EDRAM without exceeding the maximum guest "
|
||||
"render target height.");
|
||||
if (kHeight * resolution_scale_y > GetMaxRenderTargetHeight()) {
|
||||
if (kHeight * draw_resolution_scale_y() > GetMaxRenderTargetHeight()) {
|
||||
return nullptr;
|
||||
}
|
||||
RenderTargetKey render_target_key;
|
||||
|
|
|
@ -29,8 +29,6 @@
|
|||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
DECLARE_bool(depth_transfer_not_equal_test);
|
||||
DECLARE_int32(draw_resolution_scale_x);
|
||||
DECLARE_int32(draw_resolution_scale_y);
|
||||
DECLARE_bool(draw_resolution_scaled_texture_offsets);
|
||||
DECLARE_bool(gamma_render_target_as_srgb);
|
||||
DECLARE_bool(native_2x_msaa);
|
||||
|
@ -204,10 +202,10 @@ class RenderTargetCache {
|
|||
// would participate in filtering. However, 1x1 scissor rounded to 1x1, with
|
||||
// the half-pixel offset of vertices, would cause the entire 0.75...2.25 quad
|
||||
// to be discarded.
|
||||
virtual uint32_t GetResolutionScaleX() const = 0;
|
||||
virtual uint32_t GetResolutionScaleY() const = 0;
|
||||
bool IsResolutionScaled() const {
|
||||
return GetResolutionScaleX() > 1 || GetResolutionScaleY() > 1;
|
||||
uint32_t draw_resolution_scale_x() const { return draw_resolution_scale_x_; }
|
||||
uint32_t draw_resolution_scale_y() const { return draw_resolution_scale_y_; }
|
||||
bool IsDrawResolutionScaled() const {
|
||||
return draw_resolution_scale_x() > 1 || draw_resolution_scale_y() > 1;
|
||||
}
|
||||
|
||||
// Virtual (both the common code and the implementation may do something
|
||||
|
@ -232,9 +230,15 @@ class RenderTargetCache {
|
|||
|
||||
protected:
|
||||
RenderTargetCache(const RegisterFile& register_file, const Memory& memory,
|
||||
TraceWriter* trace_writer)
|
||||
TraceWriter* trace_writer, uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y)
|
||||
: register_file_(register_file),
|
||||
draw_extent_estimator_(register_file, memory, trace_writer) {}
|
||||
draw_extent_estimator_(register_file, memory, trace_writer),
|
||||
draw_resolution_scale_x_(draw_resolution_scale_x),
|
||||
draw_resolution_scale_y_(draw_resolution_scale_y) {
|
||||
assert_not_zero(draw_resolution_scale_x);
|
||||
assert_not_zero(draw_resolution_scale_y);
|
||||
}
|
||||
|
||||
const RegisterFile& register_file() const { return register_file_; }
|
||||
|
||||
|
@ -559,8 +563,8 @@ class RenderTargetCache {
|
|||
uint32_t pitch_tiles, bool msaa_2x_supported) const {
|
||||
HostDepthStoreRenderTargetConstant constant;
|
||||
constant.pitch_tiles = pitch_tiles;
|
||||
constant.resolution_scale_x = GetResolutionScaleX();
|
||||
constant.resolution_scale_y = GetResolutionScaleY();
|
||||
constant.resolution_scale_x = draw_resolution_scale_x();
|
||||
constant.resolution_scale_y = draw_resolution_scale_y();
|
||||
constant.msaa_2x_supported = uint32_t(msaa_2x_supported);
|
||||
return constant;
|
||||
}
|
||||
|
@ -612,6 +616,8 @@ class RenderTargetCache {
|
|||
|
||||
private:
|
||||
const RegisterFile& register_file_;
|
||||
uint32_t draw_resolution_scale_x_;
|
||||
uint32_t draw_resolution_scale_y_;
|
||||
|
||||
DrawExtentEstimator draw_extent_estimator_;
|
||||
|
||||
|
|
|
@ -208,10 +208,6 @@ SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
|
|||
}
|
||||
|
||||
void SharedMemory::UnwatchMemoryRange(WatchHandle handle) {
|
||||
if (handle == nullptr) {
|
||||
// Could be a zero length range.
|
||||
return;
|
||||
}
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
UnlinkWatchRange(reinterpret_cast<WatchRange*>(handle));
|
||||
}
|
||||
|
@ -228,8 +224,8 @@ void SharedMemory::FireWatches(uint32_t page_first, uint32_t page_last,
|
|||
|
||||
// Fire global watches.
|
||||
for (const auto global_watch : global_watches_) {
|
||||
global_watch->callback(global_watch->callback_context, address_first,
|
||||
address_last, invalidated_by_gpu);
|
||||
global_watch->callback(global_lock, global_watch->callback_context,
|
||||
address_first, address_last, invalidated_by_gpu);
|
||||
}
|
||||
|
||||
// Fire per-range watches.
|
||||
|
@ -241,8 +237,9 @@ void SharedMemory::FireWatches(uint32_t page_first, uint32_t page_last,
|
|||
// will be broken.
|
||||
node = node->bucket_node_next;
|
||||
if (page_first <= range->page_last && page_last >= range->page_first) {
|
||||
range->callback(range->callback_context, range->callback_data,
|
||||
range->callback_argument, invalidated_by_gpu);
|
||||
range->callback(global_lock, range->callback_context,
|
||||
range->callback_data, range->callback_argument,
|
||||
invalidated_by_gpu);
|
||||
UnlinkWatchRange(range);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -11,6 +11,7 @@
|
|||
#define XENIA_GPU_SHARED_MEMORY_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
|
@ -32,9 +33,9 @@ class SharedMemory {
|
|||
// Call in the implementation-specific ClearCache.
|
||||
virtual void ClearCache();
|
||||
|
||||
typedef void (*GlobalWatchCallback)(void* context, uint32_t address_first,
|
||||
uint32_t address_last,
|
||||
bool invalidated_by_gpu);
|
||||
typedef void (*GlobalWatchCallback)(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
|
||||
typedef void* GlobalWatchHandle;
|
||||
// Registers a callback invoked when something is invalidated in the GPU
|
||||
// memory copy by the CPU or (if triggered explicitly - such as by a resolve)
|
||||
|
@ -47,8 +48,9 @@ class SharedMemory {
|
|||
GlobalWatchHandle RegisterGlobalWatch(GlobalWatchCallback callback,
|
||||
void* callback_context);
|
||||
void UnregisterGlobalWatch(GlobalWatchHandle handle);
|
||||
typedef void (*WatchCallback)(void* context, void* data, uint64_t argument,
|
||||
bool invalidated_by_gpu);
|
||||
typedef void (*WatchCallback)(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
void* data, uint64_t argument, bool invalidated_by_gpu);
|
||||
typedef void* WatchHandle;
|
||||
// Registers a callback invoked when the specified memory range is invalidated
|
||||
// in the GPU memory copy by the CPU or (if triggered explicitly - such as by
|
||||
|
|
|
@ -0,0 +1,871 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/texture_cache.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/clock.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/texture_util.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
DEFINE_int32(
|
||||
draw_resolution_scale_x, 1,
|
||||
"Integer pixel width scale used for scaling the rendering resolution "
|
||||
"opaquely to the game.\n"
|
||||
"1, 2 and 3 may be supported, but support of anything above 1 depends on "
|
||||
"the device properties, such as whether it supports sparse binding / tiled "
|
||||
"resources, the number of virtual address bits per resource, and other "
|
||||
"factors.\n"
|
||||
"Various effects and parts of game rendering pipelines may work "
|
||||
"incorrectly as pixels become ambiguous from the game's perspective and "
|
||||
"because half-pixel offset (which normally doesn't affect coverage when "
|
||||
"MSAA isn't used) becomes full-pixel.",
|
||||
"GPU");
|
||||
DEFINE_int32(
|
||||
draw_resolution_scale_y, 1,
|
||||
"Integer pixel width scale used for scaling the rendering resolution "
|
||||
"opaquely to the game.\n"
|
||||
"See draw_resolution_scale_x for more information.",
|
||||
"GPU");
|
||||
DEFINE_uint32(
|
||||
texture_cache_memory_limit_soft, 384,
|
||||
"Maximum host texture memory usage (in megabytes) above which old textures "
|
||||
"will be destroyed.",
|
||||
"GPU");
|
||||
DEFINE_uint32(
|
||||
texture_cache_memory_limit_soft_lifetime, 30,
|
||||
"Seconds a texture should be unused to be considered old enough to be "
|
||||
"deleted if texture memory usage exceeds texture_cache_memory_limit_soft.",
|
||||
"GPU");
|
||||
DEFINE_uint32(
|
||||
texture_cache_memory_limit_hard, 768,
|
||||
"Maximum host texture memory usage (in megabytes) above which textures "
|
||||
"will be destroyed as soon as possible.",
|
||||
"GPU");
|
||||
DEFINE_uint32(
|
||||
texture_cache_memory_limit_render_to_texture, 24,
|
||||
"Part of the host texture memory budget (in megabytes) that will be scaled "
|
||||
"by the current drawing resolution scale.\n"
|
||||
"If texture_cache_memory_limit_soft, for instance, is 384, and this is 24, "
|
||||
"it will be assumed that the game will be using roughly 24 MB of "
|
||||
"render-to-texture (resolve) targets and 384 - 24 = 360 MB of regular "
|
||||
"textures - so with 2x2 resolution scaling, the soft limit will be 360 + "
|
||||
"96 MB, and with 3x3, it will be 360 + 216 MB.",
|
||||
"GPU");
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
TextureCache::TextureCache(const RegisterFile& register_file,
|
||||
SharedMemory& shared_memory,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y)
|
||||
: register_file_(register_file),
|
||||
shared_memory_(shared_memory),
|
||||
draw_resolution_scale_x_(draw_resolution_scale_x),
|
||||
draw_resolution_scale_y_(draw_resolution_scale_y) {
|
||||
assert_true(draw_resolution_scale_x >= 1);
|
||||
assert_true(draw_resolution_scale_x <= kMaxDrawResolutionScaleAlongAxis);
|
||||
assert_true(draw_resolution_scale_y >= 1);
|
||||
assert_true(draw_resolution_scale_y <= kMaxDrawResolutionScaleAlongAxis);
|
||||
|
||||
if (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) {
|
||||
constexpr uint32_t kScaledResolvePageDwordCount =
|
||||
SharedMemory::kBufferSize / 4096 / 32;
|
||||
scaled_resolve_pages_ =
|
||||
std::unique_ptr<uint32_t[]>(new uint32_t[kScaledResolvePageDwordCount]);
|
||||
std::memset(scaled_resolve_pages_.get(), 0,
|
||||
kScaledResolvePageDwordCount * sizeof(uint32_t));
|
||||
std::memset(scaled_resolve_pages_l2_, 0, sizeof(scaled_resolve_pages_l2_));
|
||||
scaled_resolve_global_watch_handle_ = shared_memory.RegisterGlobalWatch(
|
||||
ScaledResolveGlobalWatchCallbackThunk, this);
|
||||
}
|
||||
}
|
||||
|
||||
TextureCache::~TextureCache() {
|
||||
DestroyAllTextures(true);
|
||||
|
||||
if (scaled_resolve_global_watch_handle_) {
|
||||
shared_memory().UnregisterGlobalWatch(scaled_resolve_global_watch_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out,
|
||||
uint32_t& y_out) {
|
||||
uint32_t config_x =
|
||||
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_x));
|
||||
uint32_t config_y =
|
||||
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_y));
|
||||
uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x);
|
||||
uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y);
|
||||
x_out = clamped_x;
|
||||
y_out = clamped_y;
|
||||
return clamped_x == config_x && clamped_y == config_y;
|
||||
}
|
||||
|
||||
void TextureCache::ClearCache() { DestroyAllTextures(); }
|
||||
|
||||
void TextureCache::CompletedSubmissionUpdated(
|
||||
uint64_t completed_submission_index) {
|
||||
// If memory usage is too high, destroy unused textures.
|
||||
uint64_t current_time = xe::Clock::QueryHostUptimeMillis();
|
||||
// texture_cache_memory_limit_render_to_texture is assumed to be included in
|
||||
// texture_cache_memory_limit_soft and texture_cache_memory_limit_hard, at 1x,
|
||||
// so subtracting 1 from the scale.
|
||||
uint32_t limit_scaled_resolve_add_mb =
|
||||
cvars::texture_cache_memory_limit_render_to_texture *
|
||||
(draw_resolution_scale_x() * draw_resolution_scale_y() - 1);
|
||||
uint32_t limit_soft_mb =
|
||||
cvars::texture_cache_memory_limit_soft + limit_scaled_resolve_add_mb;
|
||||
uint32_t limit_hard_mb =
|
||||
cvars::texture_cache_memory_limit_hard + limit_scaled_resolve_add_mb;
|
||||
uint32_t limit_soft_lifetime =
|
||||
cvars::texture_cache_memory_limit_soft_lifetime * 1000;
|
||||
bool destroyed_any = false;
|
||||
while (texture_used_first_ != nullptr) {
|
||||
uint64_t total_host_memory_usage_mb =
|
||||
(textures_total_host_memory_usage_ + ((UINT32_C(1) << 20) - 1)) >> 20;
|
||||
bool limit_hard_exceeded = total_host_memory_usage_mb > limit_hard_mb;
|
||||
if (total_host_memory_usage_mb <= limit_soft_mb && !limit_hard_exceeded) {
|
||||
break;
|
||||
}
|
||||
Texture* texture = texture_used_first_;
|
||||
if (texture->last_usage_submission_index() > completed_submission_index) {
|
||||
break;
|
||||
}
|
||||
if (!limit_hard_exceeded &&
|
||||
(texture->last_usage_time() + limit_soft_lifetime) > current_time) {
|
||||
break;
|
||||
}
|
||||
if (!destroyed_any) {
|
||||
destroyed_any = true;
|
||||
// The texture being destroyed might have been bound in the previous
|
||||
// submissions, and nothing has overwritten the binding yet, so completion
|
||||
// of the submission where the texture was last actually used on the GPU
|
||||
// doesn't imply that it's not bound currently. Reset bindings if
|
||||
// any texture has been destroyed.
|
||||
ResetTextureBindings();
|
||||
}
|
||||
// Remove the texture from the map and destroy it via its unique_ptr.
|
||||
auto found_texture_it = textures_.find(texture->key());
|
||||
assert_true(found_texture_it != textures_.end());
|
||||
if (found_texture_it != textures_.end()) {
|
||||
assert_true(found_texture_it->second.get() == texture);
|
||||
textures_.erase(found_texture_it);
|
||||
// `texture` is invalid now.
|
||||
}
|
||||
}
|
||||
if (destroyed_any) {
|
||||
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::BeginSubmission(uint64_t new_submission_index) {
|
||||
assert_true(new_submission_index > current_submission_index_);
|
||||
current_submission_index_ = new_submission_index;
|
||||
current_submission_time_ = xe::Clock::QueryHostUptimeMillis();
|
||||
}
|
||||
|
||||
void TextureCache::BeginFrame() {
|
||||
// In case there was a failure to create something in the previous frame, make
|
||||
// sure bindings are reset so a new attempt will surely be made if the texture
|
||||
// is requested again.
|
||||
ResetTextureBindings();
|
||||
}
|
||||
|
||||
void TextureCache::MarkRangeAsResolved(uint32_t start_unscaled,
|
||||
uint32_t length_unscaled) {
|
||||
if (length_unscaled == 0) {
|
||||
return;
|
||||
}
|
||||
start_unscaled &= 0x1FFFFFFF;
|
||||
length_unscaled = std::min(length_unscaled, 0x20000000 - start_unscaled);
|
||||
|
||||
if (IsDrawResolutionScaled()) {
|
||||
uint32_t page_first = start_unscaled >> 12;
|
||||
uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12;
|
||||
uint32_t block_first = page_first >> 5;
|
||||
uint32_t block_last = page_last >> 5;
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
for (uint32_t i = block_first; i <= block_last; ++i) {
|
||||
uint32_t add_bits = UINT32_MAX;
|
||||
if (i == block_first) {
|
||||
add_bits &= ~((UINT32_C(1) << (page_first & 31)) - 1);
|
||||
}
|
||||
if (i == block_last && (page_last & 31) != 31) {
|
||||
add_bits &= (UINT32_C(1) << ((page_last & 31) + 1)) - 1;
|
||||
}
|
||||
scaled_resolve_pages_[i] |= add_bits;
|
||||
scaled_resolve_pages_l2_[i >> 6] |= UINT64_C(1) << (i & 63);
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidate textures. Toggling individual textures between scaled and
|
||||
// unscaled also relies on invalidation through shared memory.
|
||||
shared_memory().RangeWrittenByGpu(start_unscaled, length_unscaled, true);
|
||||
}
|
||||
|
||||
uint32_t TextureCache::GuestToHostSwizzle(uint32_t guest_swizzle,
|
||||
uint32_t host_format_swizzle) {
|
||||
uint32_t host_swizzle = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint32_t guest_swizzle_component = (guest_swizzle >> (3 * i)) & 0b111;
|
||||
uint32_t host_swizzle_component;
|
||||
if (guest_swizzle_component >= xenos::XE_GPU_TEXTURE_SWIZZLE_0) {
|
||||
// Get rid of 6 and 7 values (to prevent host GPU errors if the game has
|
||||
// something broken) the simple way - by changing them to 4 (0) and 5 (1).
|
||||
host_swizzle_component = guest_swizzle_component & 0b101;
|
||||
} else {
|
||||
host_swizzle_component =
|
||||
(host_format_swizzle >> (3 * guest_swizzle_component)) & 0b111;
|
||||
}
|
||||
host_swizzle |= host_swizzle_component << (3 * i);
|
||||
}
|
||||
return host_swizzle;
|
||||
}
|
||||
|
||||
void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
||||
const auto& regs = register_file();
|
||||
|
||||
if (texture_became_outdated_.exchange(false, std::memory_order_acquire)) {
|
||||
// A texture has become outdated - make sure whether textures are outdated
|
||||
// is rechecked in this draw and in subsequent ones to reload the new data
|
||||
// if needed.
|
||||
ResetTextureBindings();
|
||||
}
|
||||
|
||||
// Update the texture keys and the textures.
|
||||
uint32_t bindings_changed = 0;
|
||||
uint32_t textures_remaining = used_texture_mask & ~texture_bindings_in_sync_;
|
||||
uint32_t index = 0;
|
||||
while (xe::bit_scan_forward(textures_remaining, &index)) {
|
||||
uint32_t index_bit = UINT32_C(1) << index;
|
||||
textures_remaining &= ~index_bit;
|
||||
TextureBinding& binding = texture_bindings_[index];
|
||||
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
|
||||
TextureKey old_key = binding.key;
|
||||
uint8_t old_swizzled_signs = binding.swizzled_signs;
|
||||
BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);
|
||||
texture_bindings_in_sync_ |= index_bit;
|
||||
if (!binding.key.is_valid) {
|
||||
if (old_key.is_valid) {
|
||||
bindings_changed |= index_bit;
|
||||
}
|
||||
binding.Reset();
|
||||
continue;
|
||||
}
|
||||
uint32_t old_host_swizzle = binding.host_swizzle;
|
||||
binding.host_swizzle =
|
||||
GuestToHostSwizzle(fetch.swizzle, GetHostFormatSwizzle(binding.key));
|
||||
|
||||
// Check if need to load the unsigned and the signed versions of the texture
|
||||
// (if the format is emulated with different host bit representations for
|
||||
// signed and unsigned - otherwise only the unsigned one is loaded).
|
||||
bool key_changed = binding.key != old_key;
|
||||
bool any_sign_was_not_signed =
|
||||
texture_util::IsAnySignNotSigned(old_swizzled_signs);
|
||||
bool any_sign_was_signed =
|
||||
texture_util::IsAnySignSigned(old_swizzled_signs);
|
||||
bool any_sign_is_not_signed =
|
||||
texture_util::IsAnySignNotSigned(binding.swizzled_signs);
|
||||
bool any_sign_is_signed =
|
||||
texture_util::IsAnySignSigned(binding.swizzled_signs);
|
||||
if (key_changed || binding.host_swizzle != old_host_swizzle ||
|
||||
any_sign_is_not_signed != any_sign_was_not_signed ||
|
||||
any_sign_is_signed != any_sign_was_signed) {
|
||||
bindings_changed |= index_bit;
|
||||
}
|
||||
bool load_unsigned_data = false, load_signed_data = false;
|
||||
if (IsSignedVersionSeparateForFormat(binding.key)) {
|
||||
// Can reuse previously loaded unsigned/signed versions if the key is the
|
||||
// same and the texture was previously bound as unsigned/signed
|
||||
// respectively (checking the previous values of signedness rather than
|
||||
// binding.texture != nullptr and binding.texture_signed != nullptr also
|
||||
// prevents repeated attempts to load the texture if it has failed to
|
||||
// load).
|
||||
if (any_sign_is_not_signed) {
|
||||
if (key_changed || !any_sign_was_not_signed) {
|
||||
binding.texture = FindOrCreateTexture(binding.key);
|
||||
load_unsigned_data = true;
|
||||
}
|
||||
} else {
|
||||
binding.texture = nullptr;
|
||||
}
|
||||
if (any_sign_is_signed) {
|
||||
if (key_changed || !any_sign_was_signed) {
|
||||
TextureKey signed_key = binding.key;
|
||||
signed_key.signed_separate = 1;
|
||||
binding.texture_signed = FindOrCreateTexture(signed_key);
|
||||
load_signed_data = true;
|
||||
}
|
||||
} else {
|
||||
binding.texture_signed = nullptr;
|
||||
}
|
||||
} else {
|
||||
// Same resource for both unsigned and signed, but descriptor formats may
|
||||
// be different.
|
||||
if (key_changed) {
|
||||
binding.texture = FindOrCreateTexture(binding.key);
|
||||
load_unsigned_data = true;
|
||||
}
|
||||
binding.texture_signed = nullptr;
|
||||
}
|
||||
if (load_unsigned_data && binding.texture != nullptr) {
|
||||
LoadTextureData(*binding.texture);
|
||||
}
|
||||
if (load_signed_data && binding.texture_signed != nullptr) {
|
||||
LoadTextureData(*binding.texture_signed);
|
||||
}
|
||||
}
|
||||
if (bindings_changed) {
|
||||
UpdateTextureBindingsImpl(bindings_changed);
|
||||
}
|
||||
}
|
||||
|
||||
const char* TextureCache::TextureKey::GetLogDimensionName(
|
||||
xenos::DataDimension dimension) {
|
||||
switch (dimension) {
|
||||
case xenos::DataDimension::k1D:
|
||||
return "1D";
|
||||
case xenos::DataDimension::k2DOrStacked:
|
||||
return "2D";
|
||||
case xenos::DataDimension::k3D:
|
||||
return "3D";
|
||||
case xenos::DataDimension::kCube:
|
||||
return "cube";
|
||||
default:
|
||||
assert_unhandled_case(dimension);
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::TextureKey::LogAction(const char* action) const {
|
||||
XELOGGPU(
|
||||
"{} {} {}{}x{}x{} {} {} texture with {} {}packed mip level{}, "
|
||||
"base at 0x{:08X} (pitch {}), mips at 0x{:08X}",
|
||||
action, tiled ? "tiled" : "linear", scaled_resolve ? "scaled " : "",
|
||||
GetWidth(), GetHeight(), GetDepthOrArraySize(), GetLogDimensionName(),
|
||||
FormatInfo::Get(format)->name, mip_max_level + 1, packed_mips ? "" : "un",
|
||||
mip_max_level != 0 ? "s" : "", base_page << 12, pitch << 5,
|
||||
mip_page << 12);
|
||||
}
|
||||
|
||||
void TextureCache::Texture::LogAction(const char* action) const {
|
||||
XELOGGPU(
|
||||
"{} {} {}{}x{}x{} {} {} texture with {} {}packed mip level{}, "
|
||||
"base at 0x{:08X} (pitch {}, size 0x{:08X}), mips at 0x{:08X} (size "
|
||||
"0x{:08X})",
|
||||
action, key_.tiled ? "tiled" : "linear",
|
||||
key_.scaled_resolve ? "scaled " : "", key_.GetWidth(), key_.GetHeight(),
|
||||
key_.GetDepthOrArraySize(), key_.GetLogDimensionName(),
|
||||
FormatInfo::Get(key_.format)->name, key_.mip_max_level + 1,
|
||||
key_.packed_mips ? "" : "un", key_.mip_max_level != 0 ? "s" : "",
|
||||
key_.base_page << 12, key_.pitch << 5, GetGuestBaseSize(),
|
||||
key_.mip_page << 12, GetGuestMipsSize());
|
||||
}
|
||||
|
||||
// The texture must be in the recent usage list. Place it in front now because
|
||||
// after creation, the texture will likely be used immediately, and it should
|
||||
// not be destroyed immediately after creation if dropping of old textures is
|
||||
// performed somehow. The list is maintained by the Texture, not the
|
||||
// TextureCache itself (unlike the `textures_` container).
|
||||
TextureCache::Texture::Texture(TextureCache& texture_cache,
|
||||
const TextureKey& key)
|
||||
: texture_cache_(texture_cache),
|
||||
key_(key),
|
||||
guest_layout_(key.GetGuestLayout()),
|
||||
base_resolved_(key.scaled_resolve),
|
||||
mips_resolved_(key.scaled_resolve),
|
||||
last_usage_submission_index_(texture_cache.current_submission_index_),
|
||||
last_usage_time_(texture_cache.current_submission_time_),
|
||||
used_previous_(texture_cache.texture_used_last_),
|
||||
used_next_(nullptr) {
|
||||
if (texture_cache.texture_used_last_) {
|
||||
texture_cache.texture_used_last_->used_next_ = this;
|
||||
} else {
|
||||
texture_cache.texture_used_first_ = this;
|
||||
}
|
||||
texture_cache.texture_used_last_ = this;
|
||||
|
||||
// Never try to upload data that doesn't exist.
|
||||
base_outdated_ = guest_layout().base.level_data_extent_bytes != 0;
|
||||
mips_outdated_ = guest_layout().mips_total_extent_bytes != 0;
|
||||
}
|
||||
|
||||
TextureCache::Texture::~Texture() {
|
||||
if (mips_watch_handle_) {
|
||||
texture_cache().shared_memory().UnwatchMemoryRange(mips_watch_handle_);
|
||||
}
|
||||
if (base_watch_handle_) {
|
||||
texture_cache().shared_memory().UnwatchMemoryRange(base_watch_handle_);
|
||||
}
|
||||
|
||||
if (used_previous_) {
|
||||
used_previous_->used_next_ = used_next_;
|
||||
} else {
|
||||
texture_cache_.texture_used_first_ = used_next_;
|
||||
}
|
||||
if (used_next_) {
|
||||
used_next_->used_previous_ = used_previous_;
|
||||
} else {
|
||||
texture_cache_.texture_used_last_ = used_previous_;
|
||||
}
|
||||
|
||||
texture_cache_.UpdateTexturesTotalHostMemoryUsage(0, host_memory_usage_);
|
||||
}
|
||||
|
||||
void TextureCache::Texture::MakeUpToDateAndWatch(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock) {
|
||||
SharedMemory& shared_memory = texture_cache().shared_memory();
|
||||
if (base_outdated_) {
|
||||
assert_not_zero(GetGuestBaseSize());
|
||||
base_outdated_ = false;
|
||||
base_watch_handle_ = shared_memory.WatchMemoryRange(
|
||||
key().base_page << 12, GetGuestBaseSize(), TextureCache::WatchCallback,
|
||||
this, nullptr, 0);
|
||||
}
|
||||
if (mips_outdated_) {
|
||||
assert_not_zero(GetGuestMipsSize());
|
||||
mips_outdated_ = false;
|
||||
mips_watch_handle_ = shared_memory.WatchMemoryRange(
|
||||
key().mip_page << 12, GetGuestMipsSize(), TextureCache::WatchCallback,
|
||||
this, nullptr, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::Texture::MarkAsUsed() {
|
||||
// This is called very frequently, don't relink unless needed for caching.
|
||||
if (last_usage_submission_index_ ==
|
||||
texture_cache_.current_submission_index_) {
|
||||
return;
|
||||
}
|
||||
last_usage_submission_index_ = texture_cache_.current_submission_index_;
|
||||
last_usage_time_ = texture_cache_.current_submission_time_;
|
||||
if (used_next_ == nullptr) {
|
||||
// Already the most recently used.
|
||||
return;
|
||||
}
|
||||
if (used_previous_ != nullptr) {
|
||||
used_previous_->used_next_ = used_next_;
|
||||
} else {
|
||||
texture_cache_.texture_used_first_ = used_next_;
|
||||
}
|
||||
used_next_->used_previous_ = used_previous_;
|
||||
used_previous_ = texture_cache_.texture_used_last_;
|
||||
used_next_ = nullptr;
|
||||
if (texture_cache_.texture_used_last_ != nullptr) {
|
||||
texture_cache_.texture_used_last_->used_next_ = this;
|
||||
}
|
||||
texture_cache_.texture_used_last_ = this;
|
||||
}
|
||||
|
||||
void TextureCache::Texture::WatchCallback(
|
||||
[[maybe_unused]] const std::unique_lock<std::recursive_mutex>& global_lock,
|
||||
bool is_mip) {
|
||||
if (is_mip) {
|
||||
assert_not_zero(GetGuestMipsSize());
|
||||
mips_outdated_ = true;
|
||||
mips_watch_handle_ = nullptr;
|
||||
} else {
|
||||
assert_not_zero(GetGuestBaseSize());
|
||||
base_outdated_ = true;
|
||||
base_watch_handle_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::WatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
void* data, uint64_t argument, bool invalidated_by_gpu) {
|
||||
Texture& texture = *static_cast<Texture*>(context);
|
||||
texture.WatchCallback(global_lock, argument != 0);
|
||||
texture.texture_cache().texture_became_outdated_.store(
|
||||
true, std::memory_order_release);
|
||||
}
|
||||
|
||||
void TextureCache::DestroyAllTextures(bool from_destructor) {
|
||||
ResetTextureBindings(from_destructor);
|
||||
textures_.clear();
|
||||
COUNT_profile_set("gpu/texture_cache/textures", 0);
|
||||
}
|
||||
|
||||
TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
||||
// Check if the texture is a scaled resolve texture.
|
||||
if (IsDrawResolutionScaled() && key.tiled &&
|
||||
IsScaledResolveSupportedForFormat(key)) {
|
||||
texture_util::TextureGuestLayout scaled_resolve_guest_layout =
|
||||
key.GetGuestLayout();
|
||||
if ((scaled_resolve_guest_layout.base.level_data_extent_bytes &&
|
||||
IsRangeScaledResolved(
|
||||
key.base_page << 12,
|
||||
scaled_resolve_guest_layout.base.level_data_extent_bytes)) ||
|
||||
(scaled_resolve_guest_layout.mips_total_extent_bytes &&
|
||||
IsRangeScaledResolved(
|
||||
key.mip_page << 12,
|
||||
scaled_resolve_guest_layout.mips_total_extent_bytes))) {
|
||||
key.scaled_resolve = 1;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t host_width = key.GetWidth();
|
||||
uint32_t host_height = key.GetHeight();
|
||||
if (key.scaled_resolve) {
|
||||
host_width *= draw_resolution_scale_x();
|
||||
host_height *= draw_resolution_scale_y();
|
||||
}
|
||||
// With 3x resolution scaling, a 2D texture may become bigger than the
|
||||
// Direct3D 11 limit, and with 2x, a 3D one as well.
|
||||
// TODO(Triang3l): Skip mips on Vulkan in this case - the minimum requirement
|
||||
// there is 4096, which is below the Xenos maximum texture size of 8192.
|
||||
uint32_t max_host_width_height = GetMaxHostTextureWidthHeight(key.dimension);
|
||||
uint32_t max_host_depth_or_array_size =
|
||||
GetMaxHostTextureDepthOrArraySize(key.dimension);
|
||||
if (host_width > max_host_width_height ||
|
||||
host_height > max_host_width_height ||
|
||||
key.GetDepthOrArraySize() > max_host_depth_or_array_size) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Try to find an existing texture.
|
||||
// TODO(Triang3l): Reuse a texture with mip_page unchanged, but base_page
|
||||
// previously 0, now not 0, to save memory - common case in streaming.
|
||||
auto found_texture_it = textures_.find(key);
|
||||
if (found_texture_it != textures_.end()) {
|
||||
return found_texture_it->second.get();
|
||||
}
|
||||
|
||||
// Create the texture and add it to the map.
|
||||
Texture* texture;
|
||||
{
|
||||
std::unique_ptr<Texture> new_texture = CreateTexture(key);
|
||||
if (!new_texture) {
|
||||
key.LogAction("Failed to create");
|
||||
return nullptr;
|
||||
}
|
||||
assert_true(new_texture->key() == key);
|
||||
texture =
|
||||
textures_.emplace(key, std::move(new_texture)).first->second.get();
|
||||
}
|
||||
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
|
||||
texture->LogAction("Created");
|
||||
return texture;
|
||||
}
|
||||
|
||||
bool TextureCache::LoadTextureData(Texture& texture) {
|
||||
// Check what needs to be uploaded.
|
||||
bool base_outdated, mips_outdated;
|
||||
{
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
base_outdated = texture.base_outdated(global_lock);
|
||||
mips_outdated = texture.mips_outdated(global_lock);
|
||||
}
|
||||
if (!base_outdated && !mips_outdated) {
|
||||
return true;
|
||||
}
|
||||
|
||||
TextureKey texture_key = texture.key();
|
||||
|
||||
// Request uploading of the texture data to the shared memory.
|
||||
// This is also necessary when resolution scaling is used - the texture cache
|
||||
// relies on shared memory for invalidation of both unscaled and scaled
|
||||
// textures. Plus a texture may be unscaled partially, when only a portion of
|
||||
// its pages is invalidated, in this case we'll need the texture from the
|
||||
// shared memory to load the unscaled parts.
|
||||
// TODO(Triang3l): Load unscaled parts.
|
||||
bool base_resolved = texture.GetBaseResolved();
|
||||
if (base_outdated) {
|
||||
if (!shared_memory().RequestRange(
|
||||
texture_key.base_page << 12, texture.GetGuestBaseSize(),
|
||||
texture_key.scaled_resolve ? nullptr : &base_resolved)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool mips_resolved = texture.GetMipsResolved();
|
||||
if (mips_outdated) {
|
||||
if (!shared_memory().RequestRange(
|
||||
texture_key.mip_page << 12, texture.GetGuestMipsSize(),
|
||||
texture_key.scaled_resolve ? nullptr : &mips_resolved)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (texture_key.scaled_resolve) {
|
||||
// Make sure all the scaled resolve memory is resident and accessible from
|
||||
// the shader, including any possible padding that hasn't yet been touched
|
||||
// by an actual resolve, but is still included in the texture size, so the
|
||||
// GPU won't be trying to access unmapped memory.
|
||||
if (!EnsureScaledResolveMemoryCommitted(texture_key.base_page << 12,
|
||||
texture.GetGuestBaseSize())) {
|
||||
return false;
|
||||
}
|
||||
if (!EnsureScaledResolveMemoryCommitted(texture_key.mip_page << 12,
|
||||
texture.GetGuestMipsSize())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Actually load the texture data.
|
||||
if (!LoadTextureDataFromResidentMemoryImpl(texture, base_outdated,
|
||||
mips_outdated)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the source of the texture (resolve vs. CPU or memexport) for
|
||||
// purposes of handling piecewise gamma emulation via sRGB and for resolution
|
||||
// scale in sampling offsets.
|
||||
if (!texture_key.scaled_resolve) {
|
||||
texture.SetBaseResolved(base_resolved);
|
||||
texture.SetMipsResolved(mips_resolved);
|
||||
}
|
||||
|
||||
// Mark the ranges as uploaded and watch them. This is needed for scaled
|
||||
// resolves as well to detect when the CPU wants to reuse the memory for a
|
||||
// regular texture or a vertex buffer, and thus the scaled resolve version is
|
||||
// not up to date anymore.
|
||||
texture.MakeUpToDateAndWatch(global_critical_region_.Acquire());
|
||||
|
||||
texture.LogAction("Loaded");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureCache::BindingInfoFromFetchConstant(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out,
|
||||
uint8_t* swizzled_signs_out) {
|
||||
// Reset the key and the signedness.
|
||||
key_out.MakeInvalid();
|
||||
if (swizzled_signs_out != nullptr) {
|
||||
*swizzled_signs_out =
|
||||
uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101);
|
||||
}
|
||||
|
||||
switch (fetch.type) {
|
||||
case xenos::FetchConstantType::kTexture:
|
||||
break;
|
||||
case xenos::FetchConstantType::kInvalidTexture:
|
||||
if (cvars::gpu_allow_invalid_fetch_constants) {
|
||||
break;
|
||||
}
|
||||
XELOGW(
|
||||
"Texture fetch constant ({:08X} {:08X} {:08X} {:08X} {:08X} {:08X}) "
|
||||
"has \"invalid\" type! This is incorrect behavior, but you can try "
|
||||
"bypassing this by launching Xenia with "
|
||||
"--gpu_allow_invalid_fetch_constants=true.",
|
||||
fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3,
|
||||
fetch.dword_4, fetch.dword_5);
|
||||
return;
|
||||
default:
|
||||
XELOGW(
|
||||
"Texture fetch constant ({:08X} {:08X} {:08X} {:08X} {:08X} {:08X}) "
|
||||
"is completely invalid!",
|
||||
fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3,
|
||||
fetch.dword_4, fetch.dword_5);
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t width_minus_1, height_minus_1, depth_or_array_size_minus_1;
|
||||
uint32_t base_page, mip_page, mip_max_level;
|
||||
texture_util::GetSubresourcesFromFetchConstant(
|
||||
fetch, &width_minus_1, &height_minus_1, &depth_or_array_size_minus_1,
|
||||
&base_page, &mip_page, nullptr, &mip_max_level);
|
||||
if (base_page == 0 && mip_page == 0) {
|
||||
// No texture data at all.
|
||||
return;
|
||||
}
|
||||
if (fetch.dimension == xenos::DataDimension::k1D) {
|
||||
bool is_invalid_1d = false;
|
||||
// TODO(Triang3l): Support long 1D textures.
|
||||
if (width_minus_1 >= xenos::kTexture2DCubeMaxWidthHeight) {
|
||||
XELOGE(
|
||||
"1D texture is too wide ({}) - ignoring! Report the game to Xenia "
|
||||
"developers",
|
||||
width_minus_1 + 1);
|
||||
is_invalid_1d = true;
|
||||
}
|
||||
assert_false(fetch.tiled);
|
||||
if (fetch.tiled) {
|
||||
XELOGE(
|
||||
"1D texture has tiling enabled in the fetch constant, but this "
|
||||
"appears to be completely wrong - ignoring! Report the game to Xenia "
|
||||
"developers");
|
||||
is_invalid_1d = true;
|
||||
}
|
||||
assert_false(fetch.packed_mips);
|
||||
if (fetch.packed_mips) {
|
||||
XELOGE(
|
||||
"1D texture has packed mips enabled in the fetch constant, but this "
|
||||
"appears to be completely wrong - ignoring! Report the game to Xenia "
|
||||
"developers");
|
||||
is_invalid_1d = true;
|
||||
}
|
||||
if (is_invalid_1d) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
xenos::TextureFormat format = GetBaseFormat(fetch.format);
|
||||
|
||||
key_out.base_page = base_page;
|
||||
key_out.mip_page = mip_page;
|
||||
key_out.dimension = fetch.dimension;
|
||||
key_out.width_minus_1 = width_minus_1;
|
||||
key_out.height_minus_1 = height_minus_1;
|
||||
key_out.depth_or_array_size_minus_1 = depth_or_array_size_minus_1;
|
||||
key_out.pitch = fetch.pitch;
|
||||
key_out.mip_max_level = mip_max_level;
|
||||
key_out.tiled = fetch.tiled;
|
||||
key_out.packed_mips = fetch.packed_mips;
|
||||
key_out.format = format;
|
||||
key_out.endianness = fetch.endianness;
|
||||
|
||||
key_out.is_valid = 1;
|
||||
|
||||
if (swizzled_signs_out != nullptr) {
|
||||
*swizzled_signs_out = texture_util::SwizzleSigns(fetch);
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::ResetTextureBindings(bool from_destructor) {
|
||||
uint32_t bindings_reset = 0;
|
||||
for (size_t i = 0; i < texture_bindings_.size(); ++i) {
|
||||
TextureBinding& binding = texture_bindings_[i];
|
||||
if (!binding.key.is_valid) {
|
||||
continue;
|
||||
}
|
||||
binding.Reset();
|
||||
bindings_reset |= UINT32_C(1) << i;
|
||||
}
|
||||
texture_bindings_in_sync_ &= ~bindings_reset;
|
||||
if (!from_destructor && bindings_reset) {
|
||||
UpdateTextureBindingsImpl(bindings_reset);
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::UpdateTexturesTotalHostMemoryUsage(uint64_t add,
|
||||
uint64_t subtract) {
|
||||
textures_total_host_memory_usage_ =
|
||||
textures_total_host_memory_usage_ - subtract + add;
|
||||
COUNT_profile_set("gpu/texture_cache/total_host_memory_usage_mb",
|
||||
uint32_t((textures_total_host_memory_usage_ +
|
||||
((UINT32_C(1) << 20) - 1)) >>
|
||||
20));
|
||||
}
|
||||
|
||||
bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled,
|
||||
uint32_t length_unscaled) {
|
||||
if (!IsDrawResolutionScaled()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
start_unscaled = std::min(start_unscaled, SharedMemory::kBufferSize);
|
||||
length_unscaled =
|
||||
std::min(length_unscaled, SharedMemory::kBufferSize - start_unscaled);
|
||||
if (!length_unscaled) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Two-level check for faster rejection since resolve targets are usually
|
||||
// placed in relatively small and localized memory portions (confirmed by
|
||||
// testing - pretty much all times the deeper level was entered, the texture
|
||||
// was a resolve target).
|
||||
uint32_t page_first = start_unscaled >> 12;
|
||||
uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12;
|
||||
uint32_t block_first = page_first >> 5;
|
||||
uint32_t block_last = page_last >> 5;
|
||||
uint32_t l2_block_first = block_first >> 6;
|
||||
uint32_t l2_block_last = block_last >> 6;
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
for (uint32_t i = l2_block_first; i <= l2_block_last; ++i) {
|
||||
uint64_t l2_block = scaled_resolve_pages_l2_[i];
|
||||
if (i == l2_block_first) {
|
||||
l2_block &= ~((UINT64_C(1) << (block_first & 63)) - 1);
|
||||
}
|
||||
if (i == l2_block_last && (block_last & 63) != 63) {
|
||||
l2_block &= (UINT64_C(1) << ((block_last & 63) + 1)) - 1;
|
||||
}
|
||||
uint32_t block_relative_index;
|
||||
while (xe::bit_scan_forward(l2_block, &block_relative_index)) {
|
||||
l2_block &= ~(UINT64_C(1) << block_relative_index);
|
||||
uint32_t block_index = (i << 6) + block_relative_index;
|
||||
uint32_t check_bits = UINT32_MAX;
|
||||
if (block_index == block_first) {
|
||||
check_bits &= ~((UINT32_C(1) << (page_first & 31)) - 1);
|
||||
}
|
||||
if (block_index == block_last && (page_last & 31) != 31) {
|
||||
check_bits &= (UINT32_C(1) << ((page_last & 31) + 1)) - 1;
|
||||
}
|
||||
if (scaled_resolve_pages_[block_index] & check_bits) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void TextureCache::ScaledResolveGlobalWatchCallbackThunk(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
|
||||
TextureCache* texture_cache = reinterpret_cast<TextureCache*>(context);
|
||||
texture_cache->ScaledResolveGlobalWatchCallback(
|
||||
global_lock, address_first, address_last, invalidated_by_gpu);
|
||||
}
|
||||
|
||||
void TextureCache::ScaledResolveGlobalWatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
|
||||
assert_true(IsDrawResolutionScaled());
|
||||
if (invalidated_by_gpu) {
|
||||
// Resolves themselves do exactly the opposite of what this should do.
|
||||
return;
|
||||
}
|
||||
// Mark scaled resolve ranges as non-scaled. Textures themselves will be
|
||||
// invalidated by their shared memory watches.
|
||||
uint32_t resolve_page_first = address_first >> 12;
|
||||
uint32_t resolve_page_last = address_last >> 12;
|
||||
uint32_t resolve_block_first = resolve_page_first >> 5;
|
||||
uint32_t resolve_block_last = resolve_page_last >> 5;
|
||||
uint32_t resolve_l2_block_first = resolve_block_first >> 6;
|
||||
uint32_t resolve_l2_block_last = resolve_block_last >> 6;
|
||||
for (uint32_t i = resolve_l2_block_first; i <= resolve_l2_block_last; ++i) {
|
||||
uint64_t resolve_l2_block = scaled_resolve_pages_l2_[i];
|
||||
uint32_t resolve_block_relative_index;
|
||||
while (
|
||||
xe::bit_scan_forward(resolve_l2_block, &resolve_block_relative_index)) {
|
||||
resolve_l2_block &= ~(UINT64_C(1) << resolve_block_relative_index);
|
||||
uint32_t resolve_block_index = (i << 6) + resolve_block_relative_index;
|
||||
uint32_t resolve_keep_bits = 0;
|
||||
if (resolve_block_index == resolve_block_first) {
|
||||
resolve_keep_bits |= (UINT32_C(1) << (resolve_page_first & 31)) - 1;
|
||||
}
|
||||
if (resolve_block_index == resolve_block_last &&
|
||||
(resolve_page_last & 31) != 31) {
|
||||
resolve_keep_bits |=
|
||||
~((UINT32_C(1) << ((resolve_page_last & 31) + 1)) - 1);
|
||||
}
|
||||
scaled_resolve_pages_[resolve_block_index] &= resolve_keep_bits;
|
||||
if (scaled_resolve_pages_[resolve_block_index] == 0) {
|
||||
scaled_resolve_pages_l2_[i] &=
|
||||
~(UINT64_C(1) << resolve_block_relative_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,568 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_TEXTURE_CACHE_H_
|
||||
#define XENIA_GPU_TEXTURE_CACHE_H_
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/mutex.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/shared_memory.h"
|
||||
#include "xenia/gpu/texture_util.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
// Manages host copies of guest textures, performing untiling, format and endian
|
||||
// conversion of textures stored in the shared memory, and also handling
|
||||
// invalidation.
|
||||
//
|
||||
// Mipmaps are treated the following way, according to the GPU hang message
|
||||
// found in game executables explaining the valid usage of BaseAddress when
|
||||
// streaming the largest LOD (it says games should not use 0 as the base address
|
||||
// when the largest LOD isn't loaded, but rather, either allocate a valid
|
||||
// address for it or make it the same as mip_address):
|
||||
// - If the texture has a base address, but no mip address, it's not mipmapped -
|
||||
// the host texture has only the largest level too.
|
||||
// - If the texture has different non-zero base address and mip address, a host
|
||||
// texture with mip_max_level+1 mipmaps is created - mip_min_level is ignored
|
||||
// and treated purely as sampler state because there are tfetch instructions
|
||||
// working directly with LOD values - including fetching with an explicit LOD.
|
||||
// However, the max level is not ignored because any mip count can be
|
||||
// specified when creating a texture, and another texture may be placed after
|
||||
// the last one.
|
||||
// - If the texture has a mip address, but the base address is 0 or the same as
|
||||
// the mip address, a mipmapped texture is created, but min/max LOD is clamped
|
||||
// to the lower bound of 1 - the game is expected to do that anyway until the
|
||||
// largest LOD is loaded.
|
||||
// TODO(Triang3l): Attach the largest LOD to existing textures with a valid
|
||||
// mip_address but no base ever used yet (no base_address) to save memory
|
||||
// because textures are streamed this way anyway.
|
||||
class TextureCache {
|
||||
public:
|
||||
// Hard limit, originating from the half-pixel offset (two-pixel offset is too
|
||||
// much, the resolve shaders, being generic for different scales, only
|
||||
// duplicate the second pixel into the first, not the third), and also due to
|
||||
// the bit counts used for passing the scale to shaders.
|
||||
static constexpr uint32_t kMaxDrawResolutionScaleAlongAxis = 3;
|
||||
|
||||
TextureCache(const TextureCache& texture_cache) = delete;
|
||||
TextureCache& operator=(const TextureCache& texture_cache) = delete;
|
||||
virtual ~TextureCache();
|
||||
|
||||
// Returns whether the actual scale is not smaller than the requested one.
|
||||
static bool GetConfigDrawResolutionScale(uint32_t& x_out, uint32_t& y_out);
|
||||
uint32_t draw_resolution_scale_x() const { return draw_resolution_scale_x_; }
|
||||
uint32_t draw_resolution_scale_y() const { return draw_resolution_scale_y_; }
|
||||
bool IsDrawResolutionScaled() const {
|
||||
return draw_resolution_scale_x_ > 1 || draw_resolution_scale_y_ > 1;
|
||||
}
|
||||
|
||||
virtual void ClearCache();
|
||||
|
||||
virtual void CompletedSubmissionUpdated(uint64_t completed_submission_index);
|
||||
virtual void BeginSubmission(uint64_t new_submission_index);
|
||||
virtual void BeginFrame();
|
||||
|
||||
void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled);
|
||||
// Ensures the memory backing the range in the scaled resolve address space is
|
||||
// allocated and returns whether it is.
|
||||
virtual bool EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled,
|
||||
uint32_t length_unscaled) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static uint32_t GuestToHostSwizzle(uint32_t guest_swizzle,
|
||||
uint32_t host_format_swizzle);
|
||||
|
||||
void TextureFetchConstantWritten(uint32_t index) {
|
||||
texture_bindings_in_sync_ &= ~(UINT32_C(1) << index);
|
||||
}
|
||||
|
||||
virtual void RequestTextures(uint32_t used_texture_mask);
|
||||
|
||||
// "ActiveTexture" means as of the latest RequestTextures call.
|
||||
|
||||
// Returns the post-swizzle signedness of a currently bound texture (must be
|
||||
// called after RequestTextures).
|
||||
uint8_t GetActiveTextureSwizzledSigns(uint32_t fetch_constant_index) const {
|
||||
const TextureBinding* binding =
|
||||
GetValidTextureBinding(fetch_constant_index);
|
||||
return binding ? binding->swizzled_signs : kSwizzledSignsUnsigned;
|
||||
}
|
||||
bool IsActiveTextureResolved(uint32_t fetch_constant_index) const {
|
||||
const TextureBinding* binding =
|
||||
GetValidTextureBinding(fetch_constant_index);
|
||||
if (!binding) {
|
||||
return false;
|
||||
}
|
||||
return (binding->texture && binding->texture->IsResolved()) ||
|
||||
(binding->texture_signed && binding->texture_signed->IsResolved());
|
||||
}
|
||||
|
||||
protected:
|
||||
struct TextureKey {
|
||||
// Dimensions minus 1 are stored similarly to how they're stored in fetch
|
||||
// constants so fewer bits can be used, while the maximum size (8192 for 2D)
|
||||
// can still be encoded (a 8192x sky texture is used in 4D530910).
|
||||
|
||||
// Physical 4 KB page with the base mip level, disregarding A/C/E address
|
||||
// range prefix.
|
||||
uint32_t base_page : 17; // 17 total
|
||||
xenos::DataDimension dimension : 2; // 19
|
||||
uint32_t width_minus_1 : 13; // 32
|
||||
|
||||
uint32_t height_minus_1 : 13; // 45
|
||||
uint32_t tiled : 1; // 46
|
||||
uint32_t packed_mips : 1; // 47
|
||||
// Physical 4 KB page with mip 1 and smaller.
|
||||
uint32_t mip_page : 17; // 64
|
||||
|
||||
// (Layers for stacked and 3D, 6 for cube, 1 for other dimensions) - 1.
|
||||
uint32_t depth_or_array_size_minus_1 : 10; // 74
|
||||
uint32_t pitch : 9; // 83
|
||||
uint32_t mip_max_level : 4; // 87
|
||||
xenos::TextureFormat format : 6; // 93
|
||||
xenos::Endian endianness : 2; // 95
|
||||
// Whether this texture is signed and has a different host representation
|
||||
// than an unsigned view of the same guest texture.
|
||||
uint32_t signed_separate : 1; // 96
|
||||
|
||||
// Whether this texture is a resolution-scaled resolve target.
|
||||
uint32_t scaled_resolve : 1; // 97
|
||||
// Least important in ==, so placed last.
|
||||
uint32_t is_valid : 1; // 98
|
||||
|
||||
TextureKey() { MakeInvalid(); }
|
||||
TextureKey(const TextureKey& key) {
|
||||
std::memcpy(this, &key, sizeof(*this));
|
||||
}
|
||||
TextureKey& operator=(const TextureKey& key) {
|
||||
std::memcpy(this, &key, sizeof(*this));
|
||||
return *this;
|
||||
}
|
||||
void MakeInvalid() {
|
||||
// Zero everything, including the padding, for a stable hash.
|
||||
std::memset(this, 0, sizeof(*this));
|
||||
}
|
||||
|
||||
using Hasher = xe::hash::XXHasher<TextureKey>;
|
||||
bool operator==(const TextureKey& key) const {
|
||||
return !std::memcmp(this, &key, sizeof(*this));
|
||||
}
|
||||
bool operator!=(const TextureKey& key) const { return !(*this == key); }
|
||||
|
||||
uint32_t GetWidth() const { return width_minus_1 + 1; }
|
||||
uint32_t GetHeight() const { return height_minus_1 + 1; }
|
||||
uint32_t GetDepthOrArraySize() const {
|
||||
return depth_or_array_size_minus_1 + 1;
|
||||
}
|
||||
|
||||
texture_util::TextureGuestLayout GetGuestLayout() const {
|
||||
return texture_util::GetGuestTextureLayout(
|
||||
dimension, pitch, GetWidth(), GetHeight(), GetDepthOrArraySize(),
|
||||
tiled, format, packed_mips, base_page != 0, mip_max_level);
|
||||
}
|
||||
|
||||
static const char* GetLogDimensionName(xenos::DataDimension dimension);
|
||||
const char* GetLogDimensionName() const {
|
||||
return GetLogDimensionName(dimension);
|
||||
}
|
||||
void LogAction(const char* action) const;
|
||||
};
|
||||
|
||||
class Texture {
|
||||
public:
|
||||
Texture(const Texture& texture) = delete;
|
||||
Texture& operator=(const Texture& texture) = delete;
|
||||
virtual ~Texture();
|
||||
|
||||
TextureCache& texture_cache() const { return texture_cache_; }
|
||||
|
||||
const TextureKey& key() const { return key_; }
|
||||
|
||||
const texture_util::TextureGuestLayout& guest_layout() const {
|
||||
return guest_layout_;
|
||||
}
|
||||
uint32_t GetGuestBaseSize() const {
|
||||
return guest_layout().base.level_data_extent_bytes;
|
||||
}
|
||||
uint32_t GetGuestMipsSize() const {
|
||||
return guest_layout().mips_total_extent_bytes;
|
||||
}
|
||||
|
||||
uint64_t GetHostMemoryUsage() const { return host_memory_usage_; }
|
||||
|
||||
uint64_t last_usage_submission_index() const {
|
||||
return last_usage_submission_index_;
|
||||
}
|
||||
uint64_t last_usage_time() const { return last_usage_time_; }
|
||||
|
||||
bool GetBaseResolved() const { return base_resolved_; }
|
||||
void SetBaseResolved(bool base_resolved) {
|
||||
assert_false(!base_resolved && key().scaled_resolve);
|
||||
base_resolved_ = base_resolved;
|
||||
}
|
||||
bool GetMipsResolved() const { return mips_resolved_; }
|
||||
void SetMipsResolved(bool mips_resolved) {
|
||||
assert_false(!mips_resolved && key().scaled_resolve);
|
||||
mips_resolved_ = mips_resolved;
|
||||
}
|
||||
bool IsResolved() const { return base_resolved_ || mips_resolved_; }
|
||||
|
||||
bool base_outdated(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock) const {
|
||||
return base_outdated_;
|
||||
}
|
||||
bool mips_outdated(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock) const {
|
||||
return mips_outdated_;
|
||||
}
|
||||
void MakeUpToDateAndWatch(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock);
|
||||
|
||||
void WatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, bool is_mip);
|
||||
|
||||
// For LRU caching - updates the last usage frame and moves the texture to
|
||||
// the end of the usage queue. Must be called any time the texture is
|
||||
// referenced by any GPU work in the implementation to make sure it's not
|
||||
// destroyed while still in use.
|
||||
void MarkAsUsed();
|
||||
|
||||
void LogAction(const char* action) const;
|
||||
|
||||
protected:
|
||||
Texture(TextureCache& texture_cache, const TextureKey& key);
|
||||
|
||||
void SetHostMemoryUsage(uint64_t new_host_memory_usage) {
|
||||
texture_cache_.UpdateTexturesTotalHostMemoryUsage(new_host_memory_usage,
|
||||
host_memory_usage_);
|
||||
host_memory_usage_ = new_host_memory_usage;
|
||||
}
|
||||
|
||||
private:
|
||||
TextureCache& texture_cache_;
|
||||
|
||||
TextureKey key_;
|
||||
|
||||
texture_util::TextureGuestLayout guest_layout_;
|
||||
|
||||
uint64_t host_memory_usage_ = 0;
|
||||
|
||||
uint64_t last_usage_submission_index_;
|
||||
uint64_t last_usage_time_;
|
||||
Texture* used_previous_;
|
||||
Texture* used_next_;
|
||||
|
||||
// Whether the most up-to-date base / mips contain pages with data from a
|
||||
// resolve operation (rather than from the CPU or memexport), primarily for
|
||||
// choosing between piecewise linear gamma and sRGB when the former is
|
||||
// emulated with the latter.
|
||||
bool base_resolved_;
|
||||
bool mips_resolved_;
|
||||
|
||||
// These are to be accessed within the global critical region to synchronize
|
||||
// with shared memory.
|
||||
// Whether the recent base level data needs reloading from the memory.
|
||||
bool base_outdated_ = false;
|
||||
// Whether the recent mip data needs reloading from the memory.
|
||||
bool mips_outdated_ = false;
|
||||
// Watch handles for the memory ranges.
|
||||
SharedMemory::WatchHandle base_watch_handle_ = nullptr;
|
||||
SharedMemory::WatchHandle mips_watch_handle_ = nullptr;
|
||||
};
|
||||
|
||||
// Rules of data access in load shaders:
|
||||
// - Source reading (from the shared memory or the scaled resolve buffer):
|
||||
// - Guest data may be stored in a sparsely-allocated buffer, or, in
|
||||
// Direct3D 12 terms, a tiled buffer. This means that some regions of the
|
||||
// buffer may not be mapped. On tiled resources tier 1 hardware, accessing
|
||||
// unmapped tiles results in undefined behavior, including a GPU page
|
||||
// fault and device removal. So, shaders must not try to access
|
||||
// potentially unmapped regions (that are outside the texture memory
|
||||
// extents calculated on the CPU, taking into account that Xenia can't
|
||||
// overestimate texture sizes freely since it must not try to upload
|
||||
// unallocated pages on the CPU).
|
||||
// - Buffer tiles have 64 KB size on Direct3D 12. Vulkan has its own
|
||||
// alignment requirements for sparse binding. But overall, we're
|
||||
// allocating pretty large regions.
|
||||
// - Resolution scaling disabled:
|
||||
// - Shared memory allocates regions of power of two sizes that map
|
||||
// directly to the same portions of the 512 MB of the console's
|
||||
// physical memory. So, a 64 KB-aligned host buffer region is also 64
|
||||
// KB-aligned in the guest address space.
|
||||
// - Tiled textures: 32x32x4-block tiles are always resident each as a
|
||||
// whole. If the width is bigger than the pitch, the overflowing 32x32x4
|
||||
// tiles are also loaded as entire tiles. We do not have separate
|
||||
// shaders for 2D and 3D. So, for tiled textures, it's safe to consider
|
||||
// that if any location within a 32x32-aligned portion is within the
|
||||
// texture bounds, the entire 32x32 portion also can be read.
|
||||
// - Linear textures: Pitch is aligned to 256 bytes. Row count, however,
|
||||
// is not aligned to anything (unless the mip tail is being loaded). The
|
||||
// overflowing last row in case `width > pitch`, however, is made
|
||||
// resident up to the last texel in it. But row start alignment is 256,
|
||||
// which is a power of two, and is smaller than the Direct3D 12 tile
|
||||
// size of 64 KB. So, if any block within a 256-aligned region is within
|
||||
// the texture bounds, without resolution scaling, reading from any
|
||||
// location in that 256-aligned region is safe.
|
||||
// - Since we use the same shaders for tiled and linear textures (as well
|
||||
// as 1D textures), this means that without resolution scaling, it's
|
||||
// safe to access a min(256 bytes, 32 blocks)-aligned portion along X,
|
||||
// but only within the same row of blocks, with bounds checking only for
|
||||
// such portion as a whole, but without additional bounds checking
|
||||
// inside of it.
|
||||
// - Therefore, it's recommended that shaders read power-of-two amounts of
|
||||
// blocks (so there will naturally be some alignment to some power of
|
||||
// two), and this way, each thread may read at most 16 16bpb blocks or
|
||||
// at most 32 8bpb or smaller blocks with in a single `if (x < width)`
|
||||
// for the whole aligned range of the same length.
|
||||
// - Resolution scaling enabled:
|
||||
// - For simplicity, unlike in the shared memory, buffer tile boundaries
|
||||
// are not aligned to powers of 2 the same way as guest addresses are.
|
||||
// While for 2x2 resolution scaling it still happens to be the case
|
||||
// because `host scaling unit address = guest scaling unit address << 2`
|
||||
// (similarly for 2x1 and 1x2), for 3x or x3, it's not - a 64 KB host
|
||||
// tile would represent 7281.777 guest bytes with 3x3 (disregarding that
|
||||
// sequences of texels that are adjacent in memory alongside the
|
||||
// horizontal axis, not individual bytes, are scaled, but even in that
|
||||
// case it's not scaling by 2^n still).
|
||||
// - The above would affect the `width > pitch` case for linear textures,
|
||||
// requiring overestimating the width in calculation of the range of the
|
||||
// tiles to map, while not doing this overestimation on the guest memory
|
||||
// extent calculation side (otherwise it may result in attempting to
|
||||
// upload unallocated memory on the CPU). For example, let's take look
|
||||
// at an extreme case of a 369x28 k_8 texture with a pitch of 256 bytes.
|
||||
// The last row, in guest memory, would be loaded from the [7168, 7281)
|
||||
// range, or, with 3x3 resolution scaling, from bytes [64512, 65529).
|
||||
// However, if we try to unconditionally load 2 pixels, like the texture
|
||||
// is 370x28, we will be accessing the bytes [64512, 65538). But bytes
|
||||
// 65536 and 65537 will be in another 64 KB tile, which may be not
|
||||
// mapped yet. However, none of this is an issue for one simple reason -
|
||||
// resolving is only possible to tiled textures, so linear textures will
|
||||
// never be resolution-scaled.
|
||||
// - Tiled textures have potentially referenced guest 32x32-block tiles
|
||||
// loaded in their entirety. So, just like for unscaled textures, if any
|
||||
// block within a tile is available, the entire tile is as well.
|
||||
// - Destination writing (to the linear buffer):
|
||||
// - host_x_blocks_per_thread specifies how many pixels can be written
|
||||
// without bounds checking within increments of that amount - the pitch of
|
||||
// the destination buffer is manually overaligned if needed.
|
||||
|
||||
// In textures, resolution scaling is done for 8-byte portions of memory for
|
||||
// 8bpp textures, and for 16-byte portions for textures of higher bit depths
|
||||
// (these are the sizes of regions where contiguous texels in memory are also
|
||||
// contiguous in the texture along the horizontal axis, so 64-bit and 128-bit
|
||||
// loads / stores, for 8bpp and 16bpp+ respectively, can be used for untiling
|
||||
// regardless of the resolution scale).
|
||||
|
||||
struct LoadConstants {
|
||||
uint32_t is_tiled_3d_endian_scale;
|
||||
// Base offset in bytes, resolution-scaled.
|
||||
uint32_t guest_offset;
|
||||
// For tiled textures - row pitch in blocks, aligned to 32, unscaled.
|
||||
// For linear textures - row pitch in bytes.
|
||||
uint32_t guest_pitch_aligned;
|
||||
// For 3D textures only (ignored otherwise) - aligned to 32, unscaled.
|
||||
uint32_t guest_z_stride_block_rows_aligned;
|
||||
|
||||
// - std140 vector boundary -
|
||||
|
||||
// If this is a packed mip tail, this is aligned to tile dimensions.
|
||||
// Resolution-scaled.
|
||||
uint32_t size_blocks[3];
|
||||
// Base offset in bytes.
|
||||
uint32_t host_offset;
|
||||
|
||||
// - std140 vector boundary -
|
||||
|
||||
uint32_t host_pitch;
|
||||
uint32_t height_texels;
|
||||
};
|
||||
|
||||
static constexpr uint8_t kSwizzledSignsUnsigned =
|
||||
uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101);
|
||||
|
||||
struct TextureBinding {
|
||||
TextureKey key;
|
||||
// Destination swizzle merged with guest to host format swizzle.
|
||||
uint32_t host_swizzle;
|
||||
// Packed TextureSign values, 2 bit per each component, with guest-side
|
||||
// destination swizzle from the fetch constant applied to them.
|
||||
uint8_t swizzled_signs;
|
||||
// Unsigned version of the texture (or signed if they have the same data).
|
||||
Texture* texture;
|
||||
// Signed version of the texture if the data in the signed version is
|
||||
// different on the host.
|
||||
Texture* texture_signed;
|
||||
|
||||
TextureBinding() { Reset(); }
|
||||
|
||||
void Reset() {
|
||||
std::memset(this, 0, sizeof(*this));
|
||||
host_swizzle = xenos::XE_GPU_TEXTURE_SWIZZLE_0000;
|
||||
swizzled_signs = kSwizzledSignsUnsigned;
|
||||
}
|
||||
};
|
||||
|
||||
TextureCache(const RegisterFile& register_file, SharedMemory& shared_memory,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y);
|
||||
|
||||
const RegisterFile& register_file() const { return register_file_; }
|
||||
SharedMemory& shared_memory() const { return shared_memory_; }
|
||||
|
||||
// May be called for purposes like clearing the cache, as well as in the
|
||||
// destructor of the implementation if textures, for instance, have references
|
||||
// to the implementation that are used in their destructor, and will become
|
||||
// invalid if the implementation is destroyed before the texture.
|
||||
void DestroyAllTextures(bool from_destructor = false);
|
||||
|
||||
// Whether the signed version of the texture has a different representation on
|
||||
// the host than its unsigned version (for example, if it's a fixed-point
|
||||
// texture emulated with a larger host pixel format).
|
||||
virtual bool IsSignedVersionSeparateForFormat(TextureKey key) const {
|
||||
return false;
|
||||
}
|
||||
// Parameters like whether the texture is tiled and its dimensions are checked
|
||||
// externally, the implementation should take only format-related parameters
|
||||
// such as the format itself and the signedness into account.
|
||||
virtual bool IsScaledResolveSupportedForFormat(TextureKey key) const {
|
||||
return false;
|
||||
}
|
||||
// For formats with less than 4 components, implementations normally should
|
||||
// replicate the last component into the non-existent ones, similar to what is
|
||||
// done for unused components of operands in shaders by Microsoft's Xbox 360
|
||||
// shader compiler (.xxxx, .xyyy, .xyzz, .xyzw).
|
||||
// For DXT3A and DXT5A, RRRR swizzle is specified in:
|
||||
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||
// 4D5307E6 also expects replicated components in k_8 sprites.
|
||||
// DXN is read as RG in 4D5307E6, but as RA in 415607E6.
|
||||
// TODO(Triang3l): Find out the correct contents of unused texture components.
|
||||
virtual uint32_t GetHostFormatSwizzle(TextureKey key) const = 0;
|
||||
|
||||
virtual uint32_t GetMaxHostTextureWidthHeight(
|
||||
xenos::DataDimension dimension) const = 0;
|
||||
virtual uint32_t GetMaxHostTextureDepthOrArraySize(
|
||||
xenos::DataDimension dimension) const = 0;
|
||||
|
||||
// The texture must be created exactly with this key (if the implementation
|
||||
// supports the texture with this key, otherwise, or in case of a runtime
|
||||
// failure, it should return nullptr), modifying it is not allowed.
|
||||
virtual std::unique_ptr<Texture> CreateTexture(TextureKey key) = 0;
|
||||
|
||||
// Returns nullptr not only if the key is not supported, but also if couldn't
|
||||
// create the texture - if it's nullptr, occasionally a recreation attempt
|
||||
// should be made.
|
||||
Texture* FindOrCreateTexture(TextureKey key);
|
||||
|
||||
bool LoadTextureData(Texture& texture);
|
||||
// Writes the texture data (for base, mips or both - but not neither) from the
|
||||
// shared memory or the scaled resolve memory. The shared memory management is
|
||||
// done outside this function, the implementation just needs to load the data
|
||||
// into the texture object.
|
||||
virtual bool LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
||||
bool load_base,
|
||||
bool load_mips) = 0;
|
||||
|
||||
// Converts a texture fetch constant to a texture key, normalizing and
|
||||
// validating the values, or creating an invalid key, and also gets the
|
||||
// post-guest-swizzle signedness.
|
||||
static void BindingInfoFromFetchConstant(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out,
|
||||
uint8_t* swizzled_signs_out);
|
||||
|
||||
// Makes all texture bindings invalid. Also requesting textures after calling
|
||||
// this will cause another attempt to create a texture or to untile it if
|
||||
// there was an error.
|
||||
void ResetTextureBindings(bool from_destructor = false);
|
||||
|
||||
const TextureBinding* GetValidTextureBinding(
|
||||
uint32_t fetch_constant_index) const {
|
||||
const TextureBinding& binding = texture_bindings_[fetch_constant_index];
|
||||
return binding.key.is_valid ? &binding : nullptr;
|
||||
}
|
||||
// Called when something in a texture binding is changed for the
|
||||
// implementation to update the internal dependencies of the binding.
|
||||
virtual void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) {}
|
||||
|
||||
private:
|
||||
void UpdateTexturesTotalHostMemoryUsage(uint64_t add, uint64_t subtract);
|
||||
|
||||
// Shared memory callback for texture data invalidation.
|
||||
static void WatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
void* data, uint64_t argument, bool invalidated_by_gpu);
|
||||
|
||||
// Checks if there are any pages that contain scaled resolve data within the
|
||||
// range.
|
||||
bool IsRangeScaledResolved(uint32_t start_unscaled, uint32_t length_unscaled);
|
||||
// Global shared memory invalidation callback for invalidating scaled resolved
|
||||
// texture data.
|
||||
static void ScaledResolveGlobalWatchCallbackThunk(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
|
||||
void ScaledResolveGlobalWatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
|
||||
|
||||
const RegisterFile& register_file_;
|
||||
SharedMemory& shared_memory_;
|
||||
uint32_t draw_resolution_scale_x_;
|
||||
uint32_t draw_resolution_scale_y_;
|
||||
|
||||
xe::global_critical_region global_critical_region_;
|
||||
// Bit vector storing whether each 4 KB physical memory page contains scaled
|
||||
// resolve data. uint32_t rather than uint64_t because parts of it can be sent
|
||||
// to shaders.
|
||||
std::unique_ptr<uint32_t[]> scaled_resolve_pages_;
|
||||
// Second level of the bit vector for faster rejection of non-scaled textures.
|
||||
// >> 12 for 4 KB pages, >> 5 for uint32_t level 1 bits, >> 6 for uint64_t
|
||||
// level 2 bits.
|
||||
uint64_t scaled_resolve_pages_l2_[SharedMemory::kBufferSize >> (12 + 5 + 6)];
|
||||
|
||||
// Global watch for scaled resolve data invalidation.
|
||||
SharedMemory::GlobalWatchHandle scaled_resolve_global_watch_handle_ = nullptr;
|
||||
|
||||
uint64_t current_submission_index_ = 0;
|
||||
uint64_t current_submission_time_ = 0;
|
||||
|
||||
std::unordered_map<TextureKey, std::unique_ptr<Texture>, TextureKey::Hasher>
|
||||
textures_;
|
||||
|
||||
uint64_t textures_total_host_memory_usage_ = 0;
|
||||
|
||||
Texture* texture_used_first_ = nullptr;
|
||||
Texture* texture_used_last_ = nullptr;
|
||||
|
||||
// Whether a texture has become outdated (a memory watch has been triggered),
|
||||
// so need to recheck if textures aren't outdated, disregarding whether fetch
|
||||
// constants have been changed.
|
||||
std::atomic<bool> texture_became_outdated_{false};
|
||||
|
||||
std::array<TextureBinding, xenos::kTextureFetchConstantCount>
|
||||
texture_bindings_;
|
||||
// Bit vector with bits reset on fetch constant writes to avoid parsing fetch
|
||||
// constants again and again.
|
||||
uint32_t texture_bindings_in_sync_ = 0;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_TEXTURE_CACHE_H_
|
|
@ -533,6 +533,9 @@ uint8_t SwizzleSigns(const xenos::xe_gpu_texture_fetch_t& fetch) {
|
|||
// If only constant components, choose according to the original format
|
||||
// (what would more likely be loaded if there were non-constant components).
|
||||
// If all components would be signed, use signed.
|
||||
// Textures with only constant components must still be bound to shaders for
|
||||
// various queries (such as filtering weights) not involving the color data
|
||||
// itself.
|
||||
if (((fetch.dword_0 >> 2) & 0b11111111) ==
|
||||
uint32_t(xenos::TextureSign::kSigned) * 0b01010101) {
|
||||
constants_sign = xenos::TextureSign::kSigned;
|
||||
|
|
|
@ -199,8 +199,10 @@ bool VulkanCommandProcessor::SetupContext() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Get the actual draw resolution scale when the texture cache
|
||||
// supports resolution scaling.
|
||||
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
|
||||
*register_file_, *memory_, &trace_writer_, *this);
|
||||
*register_file_, *memory_, &trace_writer_, 1, 1, *this);
|
||||
if (!render_target_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize the render target cache");
|
||||
return false;
|
||||
|
@ -2199,8 +2201,8 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
|||
// more likely.
|
||||
depth_bias_slope_factor *=
|
||||
xenos::kPolygonOffsetScaleSubpixelUnit *
|
||||
float(std::max(render_target_cache_->GetResolutionScaleX(),
|
||||
render_target_cache_->GetResolutionScaleY()));
|
||||
float(std::max(render_target_cache_->draw_resolution_scale_x(),
|
||||
render_target_cache_->draw_resolution_scale_y()));
|
||||
// std::memcmp instead of != so in case of NaN, every draw won't be
|
||||
// invalidating it.
|
||||
dynamic_depth_bias_update_needed_ |=
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/texture_cache.h"
|
||||
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
@ -115,8 +116,10 @@ const VulkanRenderTargetCache::TransferModeInfo
|
|||
|
||||
VulkanRenderTargetCache::VulkanRenderTargetCache(
|
||||
const RegisterFile& register_file, const Memory& memory,
|
||||
TraceWriter* trace_writer, VulkanCommandProcessor& command_processor)
|
||||
: RenderTargetCache(register_file, memory, trace_writer),
|
||||
TraceWriter* trace_writer, uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y, VulkanCommandProcessor& command_processor)
|
||||
: RenderTargetCache(register_file, memory, trace_writer,
|
||||
draw_resolution_scale_x, draw_resolution_scale_y),
|
||||
command_processor_(command_processor) {}
|
||||
|
||||
VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); }
|
||||
|
@ -201,8 +204,8 @@ bool VulkanRenderTargetCache::Initialize() {
|
|||
// maxStorageBufferRange.
|
||||
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
|
||||
provider,
|
||||
VkDeviceSize(xenos::kEdramSizeBytes * resolution_scale_x_ *
|
||||
resolution_scale_y_),
|
||||
VkDeviceSize(xenos::kEdramSizeBytes *
|
||||
(draw_resolution_scale_x() * draw_resolution_scale_y())),
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
ui::vulkan::util::MemoryPurpose::kDeviceLocal, edram_buffer_,
|
||||
|
@ -972,10 +975,10 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget(
|
|||
image_create_info.pNext = nullptr;
|
||||
image_create_info.flags = 0;
|
||||
image_create_info.imageType = VK_IMAGE_TYPE_2D;
|
||||
image_create_info.extent.width = key.GetWidth() * resolution_scale_x_;
|
||||
image_create_info.extent.width = key.GetWidth() * draw_resolution_scale_x();
|
||||
image_create_info.extent.height =
|
||||
GetRenderTargetHeight(key.pitch_tiles_at_32bpp, key.msaa_samples) *
|
||||
resolution_scale_y_;
|
||||
draw_resolution_scale_y();
|
||||
image_create_info.extent.depth = 1;
|
||||
image_create_info.mipLevels = 1;
|
||||
image_create_info.arrayLayers = 1;
|
||||
|
@ -1752,9 +1755,9 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
|
|||
// be done at texture fetch.
|
||||
|
||||
uint32_t tile_width_samples_scaled =
|
||||
xenos::kEdramTileWidthSamples * resolution_scale_x_;
|
||||
xenos::kEdramTileWidthSamples * draw_resolution_scale_x();
|
||||
uint32_t tile_height_samples_scaled =
|
||||
xenos::kEdramTileHeightSamples * resolution_scale_y_;
|
||||
xenos::kEdramTileHeightSamples * draw_resolution_scale_y();
|
||||
|
||||
// Convert the fragment coordinates to uint2.
|
||||
uint_vector_temp.clear();
|
||||
|
@ -1788,7 +1791,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
|
|||
uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k2X);
|
||||
uint32_t dest_tile_width_divide_scale, dest_tile_width_divide_shift;
|
||||
draw_util::GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
resolution_scale_x_, dest_tile_width_divide_scale,
|
||||
draw_resolution_scale_x(), dest_tile_width_divide_scale,
|
||||
dest_tile_width_divide_shift);
|
||||
// Doing 16*16=32 multiplication, not 32*32=64.
|
||||
// TODO(Triang3l): Abstract this away, don't do 32*32 on Direct3D 12 too.
|
||||
|
@ -1808,7 +1811,11 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
|
|||
builder.makeUintConstant(tile_width_samples_scaled >>
|
||||
dest_sample_width_log2)));
|
||||
spv::Id dest_tile_index_y, dest_tile_pixel_y;
|
||||
if (resolution_scale_y_ == 3) {
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"VulkanRenderTargetCache EDRAM range ownership transfer shader "
|
||||
"generation supports Y draw resolution scaling factors of only up to 3");
|
||||
if (draw_resolution_scale_y() == 3) {
|
||||
dest_tile_index_y = builder.createBinOp(
|
||||
spv::OpShiftRightLogical, type_uint,
|
||||
builder.createBinOp(
|
||||
|
@ -1823,9 +1830,9 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
|
|||
builder.makeUintConstant(tile_height_samples_scaled >>
|
||||
dest_sample_height_log2)));
|
||||
} else {
|
||||
assert_true(resolution_scale_y_ <= 2);
|
||||
assert_true(draw_resolution_scale_y() <= 2);
|
||||
uint32_t dest_tile_height_pixels_log2 =
|
||||
(resolution_scale_y_ == 2 ? 5 : 4) - dest_sample_height_log2;
|
||||
(draw_resolution_scale_y() == 2 ? 5 : 4) - dest_sample_height_log2;
|
||||
dest_tile_index_y = builder.createBinOp(
|
||||
spv::OpShiftRightLogical, type_uint, dest_pixel_y,
|
||||
builder.makeUintConstant(dest_tile_height_pixels_log2));
|
||||
|
@ -3967,13 +3974,13 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears(
|
|||
// Assuming the rectangle is already clamped by the setup function from the
|
||||
// common render target cache.
|
||||
resolve_clear_rect.rect.offset.x =
|
||||
int32_t(resolve_clear_rectangle->x_pixels * resolution_scale_x_);
|
||||
int32_t(resolve_clear_rectangle->x_pixels * draw_resolution_scale_x());
|
||||
resolve_clear_rect.rect.offset.y =
|
||||
int32_t(resolve_clear_rectangle->y_pixels * resolution_scale_y_);
|
||||
int32_t(resolve_clear_rectangle->y_pixels * draw_resolution_scale_y());
|
||||
resolve_clear_rect.rect.extent.width =
|
||||
resolve_clear_rectangle->width_pixels * resolution_scale_x_;
|
||||
resolve_clear_rectangle->width_pixels * draw_resolution_scale_x();
|
||||
resolve_clear_rect.rect.extent.height =
|
||||
resolve_clear_rectangle->height_pixels * resolution_scale_y_;
|
||||
resolve_clear_rectangle->height_pixels * draw_resolution_scale_y();
|
||||
resolve_clear_rect.baseArrayLayer = 0;
|
||||
resolve_clear_rect.layerCount = 1;
|
||||
}
|
||||
|
@ -4437,14 +4444,16 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears(
|
|||
++j) {
|
||||
const Transfer::Rectangle& stencil_clear_rectangle =
|
||||
transfer_stencil_clear_rectangles[j];
|
||||
stencil_clear_rect_write_ptr->rect.offset.x =
|
||||
int32_t(stencil_clear_rectangle.x_pixels * resolution_scale_x_);
|
||||
stencil_clear_rect_write_ptr->rect.offset.y =
|
||||
int32_t(stencil_clear_rectangle.y_pixels * resolution_scale_y_);
|
||||
stencil_clear_rect_write_ptr->rect.offset.x = int32_t(
|
||||
stencil_clear_rectangle.x_pixels * draw_resolution_scale_x());
|
||||
stencil_clear_rect_write_ptr->rect.offset.y = int32_t(
|
||||
stencil_clear_rectangle.y_pixels * draw_resolution_scale_y());
|
||||
stencil_clear_rect_write_ptr->rect.extent.width =
|
||||
stencil_clear_rectangle.width_pixels * resolution_scale_x_;
|
||||
stencil_clear_rectangle.width_pixels *
|
||||
draw_resolution_scale_x();
|
||||
stencil_clear_rect_write_ptr->rect.extent.height =
|
||||
stencil_clear_rectangle.height_pixels * resolution_scale_y_;
|
||||
stencil_clear_rectangle.height_pixels *
|
||||
draw_resolution_scale_y();
|
||||
stencil_clear_rect_write_ptr->baseArrayLayer = 0;
|
||||
stencil_clear_rect_write_ptr->layerCount = 1;
|
||||
++stencil_clear_rect_write_ptr;
|
||||
|
|
|
@ -87,6 +87,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
|
||||
VulkanRenderTargetCache(const RegisterFile& register_file,
|
||||
const Memory& memory, TraceWriter* trace_writer,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y,
|
||||
VulkanCommandProcessor& command_processor);
|
||||
~VulkanRenderTargetCache();
|
||||
|
||||
|
@ -100,9 +102,6 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
// TODO(Triang3l): Fragment shader interlock.
|
||||
Path GetPath() const override { return Path::kHostRenderTargets; }
|
||||
|
||||
uint32_t GetResolutionScaleX() const override { return resolution_scale_x_; }
|
||||
uint32_t GetResolutionScaleY() const override { return resolution_scale_y_; }
|
||||
|
||||
bool Update(bool is_rasterization_done,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||
uint32_t normalized_color_mask,
|
||||
|
@ -206,9 +205,6 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
|
||||
uint32_t resolution_scale_x_ = 1;
|
||||
uint32_t resolution_scale_y_ = 1;
|
||||
|
||||
// Accessible in fragment and compute shaders.
|
||||
VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout descriptor_set_layout_sampled_image_ = VK_NULL_HANDLE;
|
||||
|
|
|
@ -905,29 +905,29 @@ constexpr bool IsSingleCopySampleSelected(CopySampleSelect copy_sample_select) {
|
|||
copy_sample_select <= CopySampleSelect::k3;
|
||||
}
|
||||
|
||||
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
||||
(((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \
|
||||
((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))
|
||||
#define XE_GPU_MAKE_TEXTURE_SWIZZLE(x, y, z, w) \
|
||||
(((xe::gpu::xenos::XE_GPU_TEXTURE_SWIZZLE_##x) << 0) | \
|
||||
((xe::gpu::xenos::XE_GPU_TEXTURE_SWIZZLE_##y) << 3) | \
|
||||
((xe::gpu::xenos::XE_GPU_TEXTURE_SWIZZLE_##z) << 6) | \
|
||||
((xe::gpu::xenos::XE_GPU_TEXTURE_SWIZZLE_##w) << 9))
|
||||
typedef enum {
|
||||
XE_GPU_SWIZZLE_X = 0,
|
||||
XE_GPU_SWIZZLE_R = 0,
|
||||
XE_GPU_SWIZZLE_Y = 1,
|
||||
XE_GPU_SWIZZLE_G = 1,
|
||||
XE_GPU_SWIZZLE_Z = 2,
|
||||
XE_GPU_SWIZZLE_B = 2,
|
||||
XE_GPU_SWIZZLE_W = 3,
|
||||
XE_GPU_SWIZZLE_A = 3,
|
||||
XE_GPU_SWIZZLE_0 = 4,
|
||||
XE_GPU_SWIZZLE_1 = 5,
|
||||
XE_GPU_SWIZZLE_RGBA = XE_GPU_MAKE_SWIZZLE(R, G, B, A),
|
||||
XE_GPU_SWIZZLE_BGRA = XE_GPU_MAKE_SWIZZLE(B, G, R, A),
|
||||
XE_GPU_SWIZZLE_RGB1 = XE_GPU_MAKE_SWIZZLE(R, G, B, 1),
|
||||
XE_GPU_SWIZZLE_BGR1 = XE_GPU_MAKE_SWIZZLE(B, G, R, 1),
|
||||
XE_GPU_SWIZZLE_000R = XE_GPU_MAKE_SWIZZLE(0, 0, 0, R),
|
||||
XE_GPU_SWIZZLE_RRR1 = XE_GPU_MAKE_SWIZZLE(R, R, R, 1),
|
||||
XE_GPU_SWIZZLE_R111 = XE_GPU_MAKE_SWIZZLE(R, 1, 1, 1),
|
||||
XE_GPU_SWIZZLE_R000 = XE_GPU_MAKE_SWIZZLE(R, 0, 0, 0),
|
||||
} XE_GPU_SWIZZLE;
|
||||
XE_GPU_TEXTURE_SWIZZLE_X = 0,
|
||||
XE_GPU_TEXTURE_SWIZZLE_R = 0,
|
||||
XE_GPU_TEXTURE_SWIZZLE_Y = 1,
|
||||
XE_GPU_TEXTURE_SWIZZLE_G = 1,
|
||||
XE_GPU_TEXTURE_SWIZZLE_Z = 2,
|
||||
XE_GPU_TEXTURE_SWIZZLE_B = 2,
|
||||
XE_GPU_TEXTURE_SWIZZLE_W = 3,
|
||||
XE_GPU_TEXTURE_SWIZZLE_A = 3,
|
||||
XE_GPU_TEXTURE_SWIZZLE_0 = 4,
|
||||
XE_GPU_TEXTURE_SWIZZLE_1 = 5,
|
||||
XE_GPU_TEXTURE_SWIZZLE_RRRR = XE_GPU_MAKE_TEXTURE_SWIZZLE(R, R, R, R),
|
||||
XE_GPU_TEXTURE_SWIZZLE_RGGG = XE_GPU_MAKE_TEXTURE_SWIZZLE(R, G, G, G),
|
||||
XE_GPU_TEXTURE_SWIZZLE_RGBB = XE_GPU_MAKE_TEXTURE_SWIZZLE(R, G, B, B),
|
||||
XE_GPU_TEXTURE_SWIZZLE_RGBA = XE_GPU_MAKE_TEXTURE_SWIZZLE(R, G, B, A),
|
||||
XE_GPU_TEXTURE_SWIZZLE_BGRA = XE_GPU_MAKE_TEXTURE_SWIZZLE(B, G, R, A),
|
||||
XE_GPU_TEXTURE_SWIZZLE_0000 = XE_GPU_MAKE_TEXTURE_SWIZZLE(0, 0, 0, 0),
|
||||
} XE_GPU_TEXTURE_SWIZZLE;
|
||||
|
||||
inline uint16_t GpuSwap(uint16_t value, Endian endianness) {
|
||||
switch (endianness) {
|
||||
|
@ -999,6 +999,9 @@ enum class FetchConstantType : uint32_t {
|
|||
kVertex,
|
||||
};
|
||||
|
||||
constexpr uint32_t kTextureFetchConstantCount = 32;
|
||||
constexpr uint32_t kVertexFetchConstantCount = 3 * kTextureFetchConstantCount;
|
||||
|
||||
// XE_GPU_REG_SHADER_CONSTANT_FETCH_*
|
||||
union alignas(uint32_t) xe_gpu_vertex_fetch_t {
|
||||
struct {
|
||||
|
@ -1128,7 +1131,7 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t {
|
|||
};
|
||||
|
||||
uint32_t num_format : 1; // +0 dword_3 frac/int
|
||||
// xyzw, 3b each (XE_GPU_SWIZZLE)
|
||||
// xyzw, 3b each (XE_GPU_TEXTURE_SWIZZLE)
|
||||
uint32_t swizzle : 12; // +1
|
||||
int32_t exp_adjust : 6; // +13
|
||||
TextureFilter mag_filter : 2; // +19
|
||||
|
|
Loading…
Reference in New Issue