[GPU] Calculate, not store, texture load host X blocks per thread

This commit is contained in:
Triang3l 2022-05-22 21:21:54 +03:00
parent 888d5044e0
commit 6735dbd941
2 changed files with 42 additions and 44 deletions

View File

@ -373,87 +373,87 @@ const D3D12TextureCache::HostFormat D3D12TextureCache::host_formats_[64] = {
const D3D12TextureCache::LoadModeInfo D3D12TextureCache::load_mode_info_[] = {
{shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs),
shaders::texture_load_8bpb_scaled_cs,
sizeof(shaders::texture_load_8bpb_scaled_cs), 3, 4, 1, 4, 16},
sizeof(shaders::texture_load_8bpb_scaled_cs), 3, 4, 1, 4},
{shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs),
shaders::texture_load_16bpb_scaled_cs,
sizeof(shaders::texture_load_16bpb_scaled_cs), 4, 4, 2, 4, 16},
sizeof(shaders::texture_load_16bpb_scaled_cs), 4, 4, 2, 4},
{shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs),
shaders::texture_load_32bpb_scaled_cs,
sizeof(shaders::texture_load_32bpb_scaled_cs), 4, 4, 4, 3, 8},
sizeof(shaders::texture_load_32bpb_scaled_cs), 4, 4, 4, 3},
{shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs),
shaders::texture_load_64bpb_scaled_cs,
sizeof(shaders::texture_load_64bpb_scaled_cs), 4, 4, 8, 2, 4},
sizeof(shaders::texture_load_64bpb_scaled_cs), 4, 4, 8, 2},
{shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs),
shaders::texture_load_128bpb_scaled_cs,
sizeof(shaders::texture_load_128bpb_scaled_cs), 4, 4, 16, 1, 2},
sizeof(shaders::texture_load_128bpb_scaled_cs), 4, 4, 16, 1},
{shaders::texture_load_r5g5b5a1_b5g5r5a1_cs,
sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs),
shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs,
sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs), 4, 4, 2, 4, 16},
sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs), 4, 4, 2, 4},
{shaders::texture_load_r5g6b5_b5g6r5_cs,
sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs),
shaders::texture_load_r5g6b5_b5g6r5_scaled_cs,
sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs), 4, 4, 2, 4, 16},
sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs), 4, 4, 2, 4},
{shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs,
sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs),
shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs,
sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs), 4, 4,
2, 4, 16},
2, 4},
{shaders::texture_load_r4g4b4a4_b4g4r4a4_cs,
sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_cs),
shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs,
sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs), 4, 4, 2, 4, 16},
sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs), 4, 4, 2, 4},
{shaders::texture_load_gbgr8_grgb8_cs,
sizeof(shaders::texture_load_gbgr8_grgb8_cs), nullptr, 0, 4, 4, 4, 3, 8},
sizeof(shaders::texture_load_gbgr8_grgb8_cs), nullptr, 0, 4, 4, 4, 3},
{shaders::texture_load_gbgr8_rgba8_cs,
sizeof(shaders::texture_load_gbgr8_rgba8_cs), nullptr, 0, 4, 4, 8, 3, 16},
sizeof(shaders::texture_load_gbgr8_rgba8_cs), nullptr, 0, 4, 4, 8, 3},
{shaders::texture_load_bgrg8_rgbg8_cs,
sizeof(shaders::texture_load_bgrg8_rgbg8_cs), nullptr, 0, 4, 4, 4, 3, 8},
sizeof(shaders::texture_load_bgrg8_rgbg8_cs), nullptr, 0, 4, 4, 4, 3},
{shaders::texture_load_bgrg8_rgba8_cs,
sizeof(shaders::texture_load_bgrg8_rgba8_cs), nullptr, 0, 4, 4, 8, 3, 16},
sizeof(shaders::texture_load_bgrg8_rgba8_cs), nullptr, 0, 4, 4, 8, 3},
{shaders::texture_load_r10g11b11_rgba16_cs,
sizeof(shaders::texture_load_r10g11b11_rgba16_cs),
shaders::texture_load_r10g11b11_rgba16_scaled_cs,
sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs), 4, 4, 8, 3, 8},
sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs), 4, 4, 8, 3},
{shaders::texture_load_r10g11b11_rgba16_snorm_cs,
sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs),
shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs,
sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs), 4, 4, 8, 3,
8},
sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs), 4, 4, 8,
3},
{shaders::texture_load_r11g11b10_rgba16_cs,
sizeof(shaders::texture_load_r11g11b10_rgba16_cs),
shaders::texture_load_r11g11b10_rgba16_scaled_cs,
sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs), 4, 4, 8, 3, 8},
sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs), 4, 4, 8, 3},
{shaders::texture_load_r11g11b10_rgba16_snorm_cs,
sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs),
shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs,
sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs), 4, 4, 8, 3,
8},
sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs), 4, 4, 8,
3},
{shaders::texture_load_dxt1_rgba8_cs,
sizeof(shaders::texture_load_dxt1_rgba8_cs), nullptr, 0, 4, 4, 4, 2, 16},
sizeof(shaders::texture_load_dxt1_rgba8_cs), nullptr, 0, 4, 4, 4, 2},
{shaders::texture_load_dxt3_rgba8_cs,
sizeof(shaders::texture_load_dxt3_rgba8_cs), nullptr, 0, 4, 4, 4, 1, 8},
sizeof(shaders::texture_load_dxt3_rgba8_cs), nullptr, 0, 4, 4, 4, 1},
{shaders::texture_load_dxt5_rgba8_cs,
sizeof(shaders::texture_load_dxt5_rgba8_cs), nullptr, 0, 4, 4, 4, 1, 8},
sizeof(shaders::texture_load_dxt5_rgba8_cs), nullptr, 0, 4, 4, 4, 1},
{shaders::texture_load_dxn_rg8_cs, sizeof(shaders::texture_load_dxn_rg8_cs),
nullptr, 0, 4, 4, 2, 1, 8},
nullptr, 0, 4, 4, 2, 1},
{shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs),
nullptr, 0, 4, 4, 1, 2, 16},
nullptr, 0, 4, 4, 1, 2},
{shaders::texture_load_dxt3aas1111_bgra4_cs,
sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs), nullptr, 0, 4, 4, 2, 2,
16},
sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs), nullptr, 0, 4, 4, 2,
2},
{shaders::texture_load_dxt5a_r8_cs,
sizeof(shaders::texture_load_dxt5a_r8_cs), nullptr, 0, 4, 4, 1, 2, 16},
sizeof(shaders::texture_load_dxt5a_r8_cs), nullptr, 0, 4, 4, 1, 2},
{shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs),
nullptr, 0, 4, 4, 2, 2, 16},
nullptr, 0, 4, 4, 2, 2},
{shaders::texture_load_depth_unorm_cs,
sizeof(shaders::texture_load_depth_unorm_cs),
shaders::texture_load_depth_unorm_scaled_cs,
sizeof(shaders::texture_load_depth_unorm_scaled_cs), 4, 4, 4, 3, 8},
sizeof(shaders::texture_load_depth_unorm_scaled_cs), 4, 4, 4, 3},
{shaders::texture_load_depth_float_cs,
sizeof(shaders::texture_load_depth_float_cs),
shaders::texture_load_depth_float_scaled_cs,
sizeof(shaders::texture_load_depth_float_scaled_cs), 4, 4, 4, 3, 8},
sizeof(shaders::texture_load_depth_float_scaled_cs), 4, 4, 4, 3},
};
D3D12TextureCache::D3D12TextureCache(const RegisterFile& register_file,
@ -1537,14 +1537,16 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
texture_resolution_scaled ? draw_resolution_scale_y() : 1;
// Get the host layout and the buffer.
uint32_t host_block_width, host_block_height;
if (host_formats_[uint32_t(guest_format)].is_block_compressed &&
!IsDecompressionNeeded(guest_format, width, height)) {
host_block_width = block_width;
host_block_height = block_height;
} else {
host_block_width = 1;
host_block_height = 1;
bool host_block_compressed =
host_formats_[uint32_t(guest_format)].is_block_compressed &&
!IsDecompressionNeeded(guest_format, width, height);
uint32_t host_block_width = host_block_compressed ? block_width : 1;
uint32_t host_block_height = host_block_compressed ? block_height : 1;
uint32_t host_x_blocks_per_thread =
UINT32_C(1) << load_mode_info.guest_x_blocks_per_thread_log2;
if (!host_block_compressed) {
// Decompressing guest blocks.
host_x_blocks_per_thread *= block_width;
}
UINT64 copy_buffer_size = 0;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_slice_layout_base;
@ -1589,7 +1591,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
host_slice_layout_base.Footprint.RowPitch =
xe::align(xe::round_up(host_slice_layout_base.Footprint.Width /
host_block_width,
load_mode_info.host_x_blocks_per_thread) *
host_x_blocks_per_thread) *
load_mode_info.bytes_per_host_block,
uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
host_slice_size_base = xe::align(
@ -1634,7 +1636,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
host_slice_layout_mip.Footprint.RowPitch =
xe::align(xe::round_up(host_slice_layout_mip.Footprint.Width /
host_block_width,
load_mode_info.host_x_blocks_per_thread) *
host_x_blocks_per_thread) *
load_mode_info.bytes_per_host_block,
uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
UINT64 host_slice_sizes_mip = xe::align(

View File

@ -235,10 +235,6 @@ class D3D12TextureCache final : public TextureCache {
// Log2 of the number of guest resolution-scaled blocks along the X axis
// loaded by a single thread shader group.
uint32_t guest_x_blocks_per_thread_log2;
// Number of host blocks (or texels for uncompressed) along the X axis
// written by a single compute shader thread - rows in the upload buffer are
// padded to at least this amount.
uint32_t host_x_blocks_per_thread;
uint32_t GetGuestXBlocksPerGroupLog2() const {
return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2;