diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index c34343670..47c47e032 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -1147,7 +1147,8 @@ bool D3D12TextureCache::ClampDrawResolutionScaleToMaxSupported( } bool D3D12TextureCache::EnsureScaledResolveMemoryCommitted( - uint32_t start_unscaled, uint32_t length_unscaled) { + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2) { assert_true(IsDrawResolutionScaled()); if (length_unscaled == 0) { @@ -1162,8 +1163,12 @@ bool D3D12TextureCache::EnsureScaledResolveMemoryCommitted( uint32_t draw_resolution_scale_area = draw_resolution_scale_x() * draw_resolution_scale_y(); uint64_t first_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area; - uint64_t last_scaled = uint64_t(start_unscaled + (length_unscaled - 1)) * - draw_resolution_scale_area; + uint64_t length_scaled_alignment_bits = + (UINT64_C(1) << length_scaled_alignment_log2) - 1; + uint64_t last_scaled = (uint64_t(start_unscaled + (length_unscaled - 1)) * + draw_resolution_scale_area + + length_scaled_alignment_bits) & + ~length_scaled_alignment_bits; const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider(); @@ -1273,7 +1278,8 @@ bool D3D12TextureCache::EnsureScaledResolveMemoryCommitted( } bool D3D12TextureCache::MakeScaledResolveRangeCurrent( - uint32_t start_unscaled, uint32_t length_unscaled) { + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2) { assert_true(IsDrawResolutionScaled()); if (!length_unscaled || start_unscaled >= SharedMemory::kBufferSize || @@ -1286,8 +1292,12 @@ bool D3D12TextureCache::MakeScaledResolveRangeCurrent( uint32_t draw_resolution_scale_area = draw_resolution_scale_x() * draw_resolution_scale_y(); uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area; + uint64_t length_scaled_alignment_bits = + (UINT64_C(1) << length_scaled_alignment_log2) - 1; uint64_t length_scaled = - uint64_t(length_unscaled) * draw_resolution_scale_area; + (uint64_t(length_unscaled) * draw_resolution_scale_area + + length_scaled_alignment_bits) & + ~length_scaled_alignment_bits; uint64_t last_scaled = start_scaled + (length_scaled - 1); // Get one or two buffers that can hold the whole range. @@ -1855,7 +1865,8 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, if (texture_resolution_scaled && (is_base || !scaled_mips_source_set_up)) { uint32_t guest_size_unscaled = is_base ? d3d12_texture.GetGuestBaseSize() : d3d12_texture.GetGuestMipsSize(); - if (!MakeScaledResolveRangeCurrent(guest_address, guest_size_unscaled)) { + if (!MakeScaledResolveRangeCurrent(guest_address, guest_size_unscaled, + load_shader_info.source_bpe_log2)) { command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); return false; diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.h b/src/xenia/gpu/d3d12/d3d12_texture_cache.h index 9b22b1e9b..6a14948fe 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.h @@ -130,14 +130,16 @@ class D3D12TextureCache final : public TextureCache { uint32_t& scale_x, uint32_t& scale_y, const ui::d3d12::D3D12Provider& provider); // Ensures the tiles backing the range in the buffers are allocated. - bool EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled, - uint32_t length_unscaled) override; + bool EnsureScaledResolveMemoryCommitted( + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0) override; // Makes the specified range of up to 1-2 GB currently accessible on the GPU. // One draw call can access only at most one range - the same memory is // accessible through different buffers based on the range needed, so aliasing // barriers are required. bool MakeScaledResolveRangeCurrent(uint32_t start_unscaled, - uint32_t length_unscaled); + uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0); // These functions create a view of the range specified in the last successful // MakeScaledResolveRangeCurrent call because that function must be called // before this. diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc index 030ae2be7..18fac01d9 100644 --- a/src/xenia/gpu/texture_cache.cc +++ b/src/xenia/gpu/texture_cache.cc @@ -656,6 +656,13 @@ bool TextureCache::LoadTextureData(Texture& texture) { TextureKey texture_key = texture.key(); + // Implementation may load multiple blocks at once via accesses of up to 128 + // bits (R32G32B32A32_UINT), so aligning the size to this value to make sure + // if the texture is small (especially if it's linear), the last blocks won't + // be cut off (hosts may return 0, 0, 0, 0 for the whole R32G32B32A32_UINT + // access for the non-16-aligned tail even if 1...15 bytes are actually + // provided for it). + // Request uploading of the texture data to the shared memory. // This is also necessary when resolution scaling is used - the texture cache // relies on shared memory for invalidation of both unscaled and scaled @@ -666,7 +673,8 @@ bool TextureCache::LoadTextureData(Texture& texture) { bool base_resolved = texture.GetBaseResolved(); if (base_outdated) { if (!shared_memory().RequestRange( - texture_key.base_page << 12, texture.GetGuestBaseSize(), + texture_key.base_page << 12, + xe::align(texture.GetGuestBaseSize(), UINT32_C(16)), texture_key.scaled_resolve ? nullptr : &base_resolved)) { return false; } @@ -674,7 +682,8 @@ bool TextureCache::LoadTextureData(Texture& texture) { bool mips_resolved = texture.GetMipsResolved(); if (mips_outdated) { if (!shared_memory().RequestRange( - texture_key.mip_page << 12, texture.GetGuestMipsSize(), + texture_key.mip_page << 12, + xe::align(texture.GetGuestMipsSize(), UINT32_C(16)), texture_key.scaled_resolve ? nullptr : &mips_resolved)) { return false; } @@ -685,11 +694,11 @@ bool TextureCache::LoadTextureData(Texture& texture) { // by an actual resolve, but is still included in the texture size, so the // GPU won't be trying to access unmapped memory. if (!EnsureScaledResolveMemoryCommitted(texture_key.base_page << 12, - texture.GetGuestBaseSize())) { + texture.GetGuestBaseSize(), 4)) { return false; } if (!EnsureScaledResolveMemoryCommitted(texture_key.mip_page << 12, - texture.GetGuestMipsSize())) { + texture.GetGuestMipsSize(), 4)) { return false; } } diff --git a/src/xenia/gpu/texture_cache.h b/src/xenia/gpu/texture_cache.h index 510fa2d86..a8392071e 100644 --- a/src/xenia/gpu/texture_cache.h +++ b/src/xenia/gpu/texture_cache.h @@ -82,8 +82,9 @@ class TextureCache { void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled); // Ensures the memory backing the range in the scaled resolve address space is // allocated and returns whether it is. - virtual bool EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled, - uint32_t length_unscaled) { + virtual bool EnsureScaledResolveMemoryCommitted( + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0) { return false; }