[GPU] Calculate, not store, texture load host X blocks per thread
This commit is contained in:
parent
888d5044e0
commit
6735dbd941
|
@ -373,87 +373,87 @@ const D3D12TextureCache::HostFormat D3D12TextureCache::host_formats_[64] = {
|
|||
const D3D12TextureCache::LoadModeInfo D3D12TextureCache::load_mode_info_[] = {
|
||||
{shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs),
|
||||
shaders::texture_load_8bpb_scaled_cs,
|
||||
sizeof(shaders::texture_load_8bpb_scaled_cs), 3, 4, 1, 4, 16},
|
||||
sizeof(shaders::texture_load_8bpb_scaled_cs), 3, 4, 1, 4},
|
||||
{shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs),
|
||||
shaders::texture_load_16bpb_scaled_cs,
|
||||
sizeof(shaders::texture_load_16bpb_scaled_cs), 4, 4, 2, 4, 16},
|
||||
sizeof(shaders::texture_load_16bpb_scaled_cs), 4, 4, 2, 4},
|
||||
{shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs),
|
||||
shaders::texture_load_32bpb_scaled_cs,
|
||||
sizeof(shaders::texture_load_32bpb_scaled_cs), 4, 4, 4, 3, 8},
|
||||
sizeof(shaders::texture_load_32bpb_scaled_cs), 4, 4, 4, 3},
|
||||
{shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs),
|
||||
shaders::texture_load_64bpb_scaled_cs,
|
||||
sizeof(shaders::texture_load_64bpb_scaled_cs), 4, 4, 8, 2, 4},
|
||||
sizeof(shaders::texture_load_64bpb_scaled_cs), 4, 4, 8, 2},
|
||||
{shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs),
|
||||
shaders::texture_load_128bpb_scaled_cs,
|
||||
sizeof(shaders::texture_load_128bpb_scaled_cs), 4, 4, 16, 1, 2},
|
||||
sizeof(shaders::texture_load_128bpb_scaled_cs), 4, 4, 16, 1},
|
||||
{shaders::texture_load_r5g5b5a1_b5g5r5a1_cs,
|
||||
sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs),
|
||||
shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs,
|
||||
sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs), 4, 4, 2, 4, 16},
|
||||
sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs), 4, 4, 2, 4},
|
||||
{shaders::texture_load_r5g6b5_b5g6r5_cs,
|
||||
sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs),
|
||||
shaders::texture_load_r5g6b5_b5g6r5_scaled_cs,
|
||||
sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs), 4, 4, 2, 4, 16},
|
||||
sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs), 4, 4, 2, 4},
|
||||
{shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs,
|
||||
sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs),
|
||||
shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs,
|
||||
sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs), 4, 4,
|
||||
2, 4, 16},
|
||||
2, 4},
|
||||
{shaders::texture_load_r4g4b4a4_b4g4r4a4_cs,
|
||||
sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_cs),
|
||||
shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs,
|
||||
sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs), 4, 4, 2, 4, 16},
|
||||
sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs), 4, 4, 2, 4},
|
||||
{shaders::texture_load_gbgr8_grgb8_cs,
|
||||
sizeof(shaders::texture_load_gbgr8_grgb8_cs), nullptr, 0, 4, 4, 4, 3, 8},
|
||||
sizeof(shaders::texture_load_gbgr8_grgb8_cs), nullptr, 0, 4, 4, 4, 3},
|
||||
{shaders::texture_load_gbgr8_rgba8_cs,
|
||||
sizeof(shaders::texture_load_gbgr8_rgba8_cs), nullptr, 0, 4, 4, 8, 3, 16},
|
||||
sizeof(shaders::texture_load_gbgr8_rgba8_cs), nullptr, 0, 4, 4, 8, 3},
|
||||
{shaders::texture_load_bgrg8_rgbg8_cs,
|
||||
sizeof(shaders::texture_load_bgrg8_rgbg8_cs), nullptr, 0, 4, 4, 4, 3, 8},
|
||||
sizeof(shaders::texture_load_bgrg8_rgbg8_cs), nullptr, 0, 4, 4, 4, 3},
|
||||
{shaders::texture_load_bgrg8_rgba8_cs,
|
||||
sizeof(shaders::texture_load_bgrg8_rgba8_cs), nullptr, 0, 4, 4, 8, 3, 16},
|
||||
sizeof(shaders::texture_load_bgrg8_rgba8_cs), nullptr, 0, 4, 4, 8, 3},
|
||||
{shaders::texture_load_r10g11b11_rgba16_cs,
|
||||
sizeof(shaders::texture_load_r10g11b11_rgba16_cs),
|
||||
shaders::texture_load_r10g11b11_rgba16_scaled_cs,
|
||||
sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs), 4, 4, 8, 3, 8},
|
||||
sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs), 4, 4, 8, 3},
|
||||
{shaders::texture_load_r10g11b11_rgba16_snorm_cs,
|
||||
sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs),
|
||||
shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs,
|
||||
sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs), 4, 4, 8, 3,
|
||||
8},
|
||||
sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs), 4, 4, 8,
|
||||
3},
|
||||
{shaders::texture_load_r11g11b10_rgba16_cs,
|
||||
sizeof(shaders::texture_load_r11g11b10_rgba16_cs),
|
||||
shaders::texture_load_r11g11b10_rgba16_scaled_cs,
|
||||
sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs), 4, 4, 8, 3, 8},
|
||||
sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs), 4, 4, 8, 3},
|
||||
{shaders::texture_load_r11g11b10_rgba16_snorm_cs,
|
||||
sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs),
|
||||
shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs,
|
||||
sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs), 4, 4, 8, 3,
|
||||
8},
|
||||
sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs), 4, 4, 8,
|
||||
3},
|
||||
{shaders::texture_load_dxt1_rgba8_cs,
|
||||
sizeof(shaders::texture_load_dxt1_rgba8_cs), nullptr, 0, 4, 4, 4, 2, 16},
|
||||
sizeof(shaders::texture_load_dxt1_rgba8_cs), nullptr, 0, 4, 4, 4, 2},
|
||||
{shaders::texture_load_dxt3_rgba8_cs,
|
||||
sizeof(shaders::texture_load_dxt3_rgba8_cs), nullptr, 0, 4, 4, 4, 1, 8},
|
||||
sizeof(shaders::texture_load_dxt3_rgba8_cs), nullptr, 0, 4, 4, 4, 1},
|
||||
{shaders::texture_load_dxt5_rgba8_cs,
|
||||
sizeof(shaders::texture_load_dxt5_rgba8_cs), nullptr, 0, 4, 4, 4, 1, 8},
|
||||
sizeof(shaders::texture_load_dxt5_rgba8_cs), nullptr, 0, 4, 4, 4, 1},
|
||||
{shaders::texture_load_dxn_rg8_cs, sizeof(shaders::texture_load_dxn_rg8_cs),
|
||||
nullptr, 0, 4, 4, 2, 1, 8},
|
||||
nullptr, 0, 4, 4, 2, 1},
|
||||
{shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs),
|
||||
nullptr, 0, 4, 4, 1, 2, 16},
|
||||
nullptr, 0, 4, 4, 1, 2},
|
||||
{shaders::texture_load_dxt3aas1111_bgra4_cs,
|
||||
sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs), nullptr, 0, 4, 4, 2, 2,
|
||||
16},
|
||||
sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs), nullptr, 0, 4, 4, 2,
|
||||
2},
|
||||
{shaders::texture_load_dxt5a_r8_cs,
|
||||
sizeof(shaders::texture_load_dxt5a_r8_cs), nullptr, 0, 4, 4, 1, 2, 16},
|
||||
sizeof(shaders::texture_load_dxt5a_r8_cs), nullptr, 0, 4, 4, 1, 2},
|
||||
{shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs),
|
||||
nullptr, 0, 4, 4, 2, 2, 16},
|
||||
nullptr, 0, 4, 4, 2, 2},
|
||||
{shaders::texture_load_depth_unorm_cs,
|
||||
sizeof(shaders::texture_load_depth_unorm_cs),
|
||||
shaders::texture_load_depth_unorm_scaled_cs,
|
||||
sizeof(shaders::texture_load_depth_unorm_scaled_cs), 4, 4, 4, 3, 8},
|
||||
sizeof(shaders::texture_load_depth_unorm_scaled_cs), 4, 4, 4, 3},
|
||||
{shaders::texture_load_depth_float_cs,
|
||||
sizeof(shaders::texture_load_depth_float_cs),
|
||||
shaders::texture_load_depth_float_scaled_cs,
|
||||
sizeof(shaders::texture_load_depth_float_scaled_cs), 4, 4, 4, 3, 8},
|
||||
sizeof(shaders::texture_load_depth_float_scaled_cs), 4, 4, 4, 3},
|
||||
};
|
||||
|
||||
D3D12TextureCache::D3D12TextureCache(const RegisterFile& register_file,
|
||||
|
@ -1537,14 +1537,16 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
|||
texture_resolution_scaled ? draw_resolution_scale_y() : 1;
|
||||
|
||||
// Get the host layout and the buffer.
|
||||
uint32_t host_block_width, host_block_height;
|
||||
if (host_formats_[uint32_t(guest_format)].is_block_compressed &&
|
||||
!IsDecompressionNeeded(guest_format, width, height)) {
|
||||
host_block_width = block_width;
|
||||
host_block_height = block_height;
|
||||
} else {
|
||||
host_block_width = 1;
|
||||
host_block_height = 1;
|
||||
bool host_block_compressed =
|
||||
host_formats_[uint32_t(guest_format)].is_block_compressed &&
|
||||
!IsDecompressionNeeded(guest_format, width, height);
|
||||
uint32_t host_block_width = host_block_compressed ? block_width : 1;
|
||||
uint32_t host_block_height = host_block_compressed ? block_height : 1;
|
||||
uint32_t host_x_blocks_per_thread =
|
||||
UINT32_C(1) << load_mode_info.guest_x_blocks_per_thread_log2;
|
||||
if (!host_block_compressed) {
|
||||
// Decompressing guest blocks.
|
||||
host_x_blocks_per_thread *= block_width;
|
||||
}
|
||||
UINT64 copy_buffer_size = 0;
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_slice_layout_base;
|
||||
|
@ -1589,7 +1591,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
|||
host_slice_layout_base.Footprint.RowPitch =
|
||||
xe::align(xe::round_up(host_slice_layout_base.Footprint.Width /
|
||||
host_block_width,
|
||||
load_mode_info.host_x_blocks_per_thread) *
|
||||
host_x_blocks_per_thread) *
|
||||
load_mode_info.bytes_per_host_block,
|
||||
uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
|
||||
host_slice_size_base = xe::align(
|
||||
|
@ -1634,7 +1636,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
|||
host_slice_layout_mip.Footprint.RowPitch =
|
||||
xe::align(xe::round_up(host_slice_layout_mip.Footprint.Width /
|
||||
host_block_width,
|
||||
load_mode_info.host_x_blocks_per_thread) *
|
||||
host_x_blocks_per_thread) *
|
||||
load_mode_info.bytes_per_host_block,
|
||||
uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
|
||||
UINT64 host_slice_sizes_mip = xe::align(
|
||||
|
|
|
@ -235,10 +235,6 @@ class D3D12TextureCache final : public TextureCache {
|
|||
// Log2 of the number of guest resolution-scaled blocks along the X axis
|
||||
// loaded by a single thread shader group.
|
||||
uint32_t guest_x_blocks_per_thread_log2;
|
||||
// Number of host blocks (or texels for uncompressed) along the X axis
|
||||
// written by a single compute shader thread - rows in the upload buffer are
|
||||
// padded to at least this amount.
|
||||
uint32_t host_x_blocks_per_thread;
|
||||
|
||||
uint32_t GetGuestXBlocksPerGroupLog2() const {
|
||||
return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2;
|
||||
|
|
Loading…
Reference in New Issue