diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index 334d479fc..84f245f63 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -373,87 +373,87 @@ const D3D12TextureCache::HostFormat D3D12TextureCache::host_formats_[64] = { const D3D12TextureCache::LoadModeInfo D3D12TextureCache::load_mode_info_[] = { {shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs), shaders::texture_load_8bpb_scaled_cs, - sizeof(shaders::texture_load_8bpb_scaled_cs), 3, 4, 1, 4, 16}, + sizeof(shaders::texture_load_8bpb_scaled_cs), 3, 4, 1, 4}, {shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs), shaders::texture_load_16bpb_scaled_cs, - sizeof(shaders::texture_load_16bpb_scaled_cs), 4, 4, 2, 4, 16}, + sizeof(shaders::texture_load_16bpb_scaled_cs), 4, 4, 2, 4}, {shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs), shaders::texture_load_32bpb_scaled_cs, - sizeof(shaders::texture_load_32bpb_scaled_cs), 4, 4, 4, 3, 8}, + sizeof(shaders::texture_load_32bpb_scaled_cs), 4, 4, 4, 3}, {shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs), shaders::texture_load_64bpb_scaled_cs, - sizeof(shaders::texture_load_64bpb_scaled_cs), 4, 4, 8, 2, 4}, + sizeof(shaders::texture_load_64bpb_scaled_cs), 4, 4, 8, 2}, {shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs), shaders::texture_load_128bpb_scaled_cs, - sizeof(shaders::texture_load_128bpb_scaled_cs), 4, 4, 16, 1, 2}, + sizeof(shaders::texture_load_128bpb_scaled_cs), 4, 4, 16, 1}, {shaders::texture_load_r5g5b5a1_b5g5r5a1_cs, sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs), shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs, - sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs), 4, 4, 2, 4, 16}, + sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs), 4, 4, 2, 4}, {shaders::texture_load_r5g6b5_b5g6r5_cs, sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs), shaders::texture_load_r5g6b5_b5g6r5_scaled_cs, - sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs), 4, 4, 2, 4, 16}, + sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs), 4, 4, 2, 4}, {shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs, sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs), shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs, sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs), 4, 4, - 2, 4, 16}, + 2, 4}, {shaders::texture_load_r4g4b4a4_b4g4r4a4_cs, sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_cs), shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs, - sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs), 4, 4, 2, 4, 16}, + sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs), 4, 4, 2, 4}, {shaders::texture_load_gbgr8_grgb8_cs, - sizeof(shaders::texture_load_gbgr8_grgb8_cs), nullptr, 0, 4, 4, 4, 3, 8}, + sizeof(shaders::texture_load_gbgr8_grgb8_cs), nullptr, 0, 4, 4, 4, 3}, {shaders::texture_load_gbgr8_rgba8_cs, - sizeof(shaders::texture_load_gbgr8_rgba8_cs), nullptr, 0, 4, 4, 8, 3, 16}, + sizeof(shaders::texture_load_gbgr8_rgba8_cs), nullptr, 0, 4, 4, 8, 3}, {shaders::texture_load_bgrg8_rgbg8_cs, - sizeof(shaders::texture_load_bgrg8_rgbg8_cs), nullptr, 0, 4, 4, 4, 3, 8}, + sizeof(shaders::texture_load_bgrg8_rgbg8_cs), nullptr, 0, 4, 4, 4, 3}, {shaders::texture_load_bgrg8_rgba8_cs, - sizeof(shaders::texture_load_bgrg8_rgba8_cs), nullptr, 0, 4, 4, 8, 3, 16}, + sizeof(shaders::texture_load_bgrg8_rgba8_cs), nullptr, 0, 4, 4, 8, 3}, {shaders::texture_load_r10g11b11_rgba16_cs, sizeof(shaders::texture_load_r10g11b11_rgba16_cs), shaders::texture_load_r10g11b11_rgba16_scaled_cs, - sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs), 4, 4, 8, 3, 8}, + sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs), 4, 4, 8, 3}, {shaders::texture_load_r10g11b11_rgba16_snorm_cs, sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs), shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs, - sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs), 4, 4, 8, 3, - 8}, + sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs), 4, 4, 8, + 3}, {shaders::texture_load_r11g11b10_rgba16_cs, sizeof(shaders::texture_load_r11g11b10_rgba16_cs), shaders::texture_load_r11g11b10_rgba16_scaled_cs, - sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs), 4, 4, 8, 3, 8}, + sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs), 4, 4, 8, 3}, {shaders::texture_load_r11g11b10_rgba16_snorm_cs, sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs), shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs, - sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs), 4, 4, 8, 3, - 8}, + sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs), 4, 4, 8, + 3}, {shaders::texture_load_dxt1_rgba8_cs, - sizeof(shaders::texture_load_dxt1_rgba8_cs), nullptr, 0, 4, 4, 4, 2, 16}, + sizeof(shaders::texture_load_dxt1_rgba8_cs), nullptr, 0, 4, 4, 4, 2}, {shaders::texture_load_dxt3_rgba8_cs, - sizeof(shaders::texture_load_dxt3_rgba8_cs), nullptr, 0, 4, 4, 4, 1, 8}, + sizeof(shaders::texture_load_dxt3_rgba8_cs), nullptr, 0, 4, 4, 4, 1}, {shaders::texture_load_dxt5_rgba8_cs, - sizeof(shaders::texture_load_dxt5_rgba8_cs), nullptr, 0, 4, 4, 4, 1, 8}, + sizeof(shaders::texture_load_dxt5_rgba8_cs), nullptr, 0, 4, 4, 4, 1}, {shaders::texture_load_dxn_rg8_cs, sizeof(shaders::texture_load_dxn_rg8_cs), - nullptr, 0, 4, 4, 2, 1, 8}, + nullptr, 0, 4, 4, 2, 1}, {shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs), - nullptr, 0, 4, 4, 1, 2, 16}, + nullptr, 0, 4, 4, 1, 2}, {shaders::texture_load_dxt3aas1111_bgra4_cs, - sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs), nullptr, 0, 4, 4, 2, 2, - 16}, + sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs), nullptr, 0, 4, 4, 2, + 2}, {shaders::texture_load_dxt5a_r8_cs, - sizeof(shaders::texture_load_dxt5a_r8_cs), nullptr, 0, 4, 4, 1, 2, 16}, + sizeof(shaders::texture_load_dxt5a_r8_cs), nullptr, 0, 4, 4, 1, 2}, {shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs), - nullptr, 0, 4, 4, 2, 2, 16}, + nullptr, 0, 4, 4, 2, 2}, {shaders::texture_load_depth_unorm_cs, sizeof(shaders::texture_load_depth_unorm_cs), shaders::texture_load_depth_unorm_scaled_cs, - sizeof(shaders::texture_load_depth_unorm_scaled_cs), 4, 4, 4, 3, 8}, + sizeof(shaders::texture_load_depth_unorm_scaled_cs), 4, 4, 4, 3}, {shaders::texture_load_depth_float_cs, sizeof(shaders::texture_load_depth_float_cs), shaders::texture_load_depth_float_scaled_cs, - sizeof(shaders::texture_load_depth_float_scaled_cs), 4, 4, 4, 3, 8}, + sizeof(shaders::texture_load_depth_float_scaled_cs), 4, 4, 4, 3}, }; D3D12TextureCache::D3D12TextureCache(const RegisterFile& register_file, @@ -1537,14 +1537,16 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, texture_resolution_scaled ? draw_resolution_scale_y() : 1; // Get the host layout and the buffer. - uint32_t host_block_width, host_block_height; - if (host_formats_[uint32_t(guest_format)].is_block_compressed && - !IsDecompressionNeeded(guest_format, width, height)) { - host_block_width = block_width; - host_block_height = block_height; - } else { - host_block_width = 1; - host_block_height = 1; + bool host_block_compressed = + host_formats_[uint32_t(guest_format)].is_block_compressed && + !IsDecompressionNeeded(guest_format, width, height); + uint32_t host_block_width = host_block_compressed ? block_width : 1; + uint32_t host_block_height = host_block_compressed ? block_height : 1; + uint32_t host_x_blocks_per_thread = + UINT32_C(1) << load_mode_info.guest_x_blocks_per_thread_log2; + if (!host_block_compressed) { + // Decompressing guest blocks. + host_x_blocks_per_thread *= block_width; } UINT64 copy_buffer_size = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_slice_layout_base; @@ -1589,7 +1591,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, host_slice_layout_base.Footprint.RowPitch = xe::align(xe::round_up(host_slice_layout_base.Footprint.Width / host_block_width, - load_mode_info.host_x_blocks_per_thread) * + host_x_blocks_per_thread) * load_mode_info.bytes_per_host_block, uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); host_slice_size_base = xe::align( @@ -1634,7 +1636,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, host_slice_layout_mip.Footprint.RowPitch = xe::align(xe::round_up(host_slice_layout_mip.Footprint.Width / host_block_width, - load_mode_info.host_x_blocks_per_thread) * + host_x_blocks_per_thread) * load_mode_info.bytes_per_host_block, uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); UINT64 host_slice_sizes_mip = xe::align( diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.h b/src/xenia/gpu/d3d12/d3d12_texture_cache.h index ba0717c97..9864e28a3 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.h @@ -235,10 +235,6 @@ class D3D12TextureCache final : public TextureCache { // Log2 of the number of guest resolution-scaled blocks along the X axis // loaded by a single thread shader group. uint32_t guest_x_blocks_per_thread_log2; - // Number of host blocks (or texels for uncompressed) along the X axis - // written by a single compute shader thread - rows in the upload buffer are - // padded to at least this amount. - uint32_t host_x_blocks_per_thread; uint32_t GetGuestXBlocksPerGroupLog2() const { return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2;