Merge branch 'master' into vulkan

This commit is contained in:
Triang3l 2022-05-24 22:34:40 +03:00
commit aac28f19d1
8 changed files with 653 additions and 521 deletions

View File

@ -562,7 +562,7 @@ class D3D12CommandProcessor : public CommandProcessor {
// Unsubmitted barrier batch. // Unsubmitted barrier batch.
std::vector<D3D12_RESOURCE_BARRIER> barriers_; std::vector<D3D12_RESOURCE_BARRIER> barriers_;
// <Resource, submission where requested>, sorted by the submission number. // <Submission where requested, resource>, sorted by the submission number.
std::deque<std::pair<uint64_t, ID3D12Resource*>> resources_for_deletion_; std::deque<std::pair<uint64_t, ID3D12Resource*>> resources_for_deletion_;
static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024; static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024;

File diff suppressed because it is too large Load Diff

View File

@ -179,85 +179,23 @@ class D3D12TextureCache final : public TextureCache {
static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2; static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2;
static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5; static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5;
enum class LoadMode {
k8bpb,
k16bpb,
k32bpb,
k64bpb,
k128bpb,
kR5G5B5A1ToB5G5R5A1,
kR5G6B5ToB5G6R5,
kR5G5B6ToB5G6R5WithRBGASwizzle,
kR4G4B4A4ToB4G4R4A4,
kGBGR8ToGRGB8,
kGBGR8ToRGB8,
kBGRG8ToRGBG8,
kBGRG8ToRGB8,
kR10G11B11ToRGBA16,
kR10G11B11ToRGBA16SNorm,
kR11G11B10ToRGBA16,
kR11G11B10ToRGBA16SNorm,
kDXT1ToRGBA8,
kDXT3ToRGBA8,
kDXT5ToRGBA8,
kDXNToRG8,
kDXT3A,
kDXT3AAs1111ToBGRA4,
kDXT5AToR8,
kCTX1,
kDepthUnorm,
kDepthFloat,
kCount,
kUnknown = kCount
};
struct LoadModeInfo {
// Shader without resolution scaling.
const void* shader;
size_t shader_size;
// Shader with resolution scaling, if available. These shaders are separate
// so the majority of the textures are not affected by the code needed for
// resolution scale support, and also to check if the format allows
// resolution scaling.
const void* shader_scaled;
size_t shader_scaled_size;
// Log2 of the sizes, in bytes, of the source (guest) SRV and the
// destination (host) UAV accessed by the copying shader, since the shader
// may copy multiple blocks per one invocation.
uint32_t srv_bpe_log2;
uint32_t uav_bpe_log2;
// Number of bytes in a host resolution-scaled block (corresponding to a
// guest block if not decompressing, or a host texel if decompressing)
// written by the shader.
uint32_t bytes_per_host_block;
// Log2 of the number of guest resolution-scaled blocks along the X axis
// loaded by a single thread shader group.
uint32_t guest_x_blocks_per_thread_log2;
uint32_t GetGuestXBlocksPerGroupLog2() const {
return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2;
}
};
struct HostFormat { struct HostFormat {
// Format info for the regular case. // Format info for the regular case.
// DXGI format (typeless when different signedness or number representation // DXGI format (typeless when different signedness or number representation
// is used) for the texture resource. // is used) for the texture resource.
DXGI_FORMAT dxgi_format_resource; DXGI_FORMAT dxgi_format_resource;
// DXGI format for unsigned normalized or unsigned/signed float SRV. // DXGI format for unsigned normalized or unsigned/signed float SRV.
DXGI_FORMAT dxgi_format_unorm; DXGI_FORMAT dxgi_format_unsigned;
// The regular load mode, used when special modes (like signed-specific or // The regular load shader, used when special load shaders (like
// decompressing) aren't needed. // signed-specific or decompressing) aren't needed.
LoadMode load_mode; LoadShaderIndex load_shader;
// DXGI format for signed normalized or unsigned/signed float SRV. // DXGI format for signed normalized or unsigned/signed float SRV.
DXGI_FORMAT dxgi_format_snorm; DXGI_FORMAT dxgi_format_signed;
// If the signed version needs a different bit representation on the host, // If the signed version needs a different bit representation on the host,
// this is the load mode for the signed version. Otherwise the regular // this is the load shader for the signed version. Otherwise the regular
// load_mode will be used for the signed version, and a single copy will be // load_shader will be used for the signed version, and a single copy will
// created if both unsigned and signed are used. // be created if both unsigned and signed are used.
LoadMode load_mode_snorm; LoadShaderIndex load_shader_signed;
// Do NOT add integer DXGI formats to this - they are not filterable, can // Do NOT add integer DXGI formats to this - they are not filterable, can
// only be read with Load, not Sample! If any game is seen using num_format // only be read with Load, not Sample! If any game is seen using num_format
@ -276,7 +214,7 @@ class D3D12TextureCache final : public TextureCache {
// supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not // supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not
// used for DXN and DXT5A. // used for DXN and DXT5A.
DXGI_FORMAT dxgi_format_uncompressed; DXGI_FORMAT dxgi_format_uncompressed;
LoadMode decompress_mode; LoadShaderIndex load_shader_decompress;
// Mapping of Xenos swizzle components to DXGI format components. // Mapping of Xenos swizzle components to DXGI format components.
uint32_t swizzle; uint32_t swizzle;
@ -440,13 +378,13 @@ class D3D12TextureCache final : public TextureCache {
const HostFormat& host_format = host_formats_[uint32_t(format)]; const HostFormat& host_format = host_formats_[uint32_t(format)];
return IsDecompressionNeeded(format, width, height) return IsDecompressionNeeded(format, width, height)
? host_format.dxgi_format_uncompressed ? host_format.dxgi_format_uncompressed
: host_format.dxgi_format_unorm; : host_format.dxgi_format_unsigned;
} }
static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) { static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
return GetDXGIUnormFormat(key.format, key.GetWidth(), key.GetHeight()); return GetDXGIUnormFormat(key.format, key.GetWidth(), key.GetHeight());
} }
static LoadMode GetLoadMode(TextureKey key); static LoadShaderIndex GetLoadShaderIndex(TextureKey key);
static constexpr bool AreDimensionsCompatible( static constexpr bool AreDimensionsCompatible(
xenos::FetchOpDimension binding_dimension, xenos::FetchOpDimension binding_dimension,
@ -528,14 +466,11 @@ class D3D12TextureCache final : public TextureCache {
D3D12CommandProcessor& command_processor_; D3D12CommandProcessor& command_processor_;
bool bindless_resources_used_; bool bindless_resources_used_;
static const LoadModeInfo load_mode_info_[];
Microsoft::WRL::ComPtr<ID3D12RootSignature> load_root_signature_; Microsoft::WRL::ComPtr<ID3D12RootSignature> load_root_signature_;
std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>, std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>, kLoadShaderCount>
size_t(LoadMode::kCount)>
load_pipelines_; load_pipelines_;
// Load pipelines for resolution-scaled resolve targets. // Load pipelines for resolution-scaled resolve targets.
std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>, std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>, kLoadShaderCount>
size_t(LoadMode::kCount)>
load_pipelines_scaled_; load_pipelines_scaled_;
std::vector<SRVDescriptorCachePage> srv_descriptor_cache_; std::vector<SRVDescriptorCachePage> srv_descriptor_cache_;

View File

@ -73,6 +73,80 @@ DEFINE_uint32(
namespace xe { namespace xe {
namespace gpu { namespace gpu {
const TextureCache::LoadShaderInfo
TextureCache::load_shader_info_[kLoadShaderCount] = {
// k8bpb
{3, 4, 1, 4},
// k16bpb
{4, 4, 2, 4},
// k32bpb
{4, 4, 4, 3},
// k64bpb
{4, 4, 8, 2},
// k128bpb
{4, 4, 16, 1},
// kR5G5B5A1ToB5G5R5A1
{4, 4, 2, 4},
// kR5G6B5ToB5G6R5
{4, 4, 2, 4},
// kR5G5B6ToB5G6R5WithRBGASwizzle
{4, 4, 2, 4},
// kRGBA4ToBGRA4
{4, 4, 2, 4},
// kRGBA4ToARGB4
{4, 4, 2, 4},
// kGBGR8ToGRGB8
{4, 4, 4, 3},
// kGBGR8ToRGB8
{4, 4, 8, 3},
// kBGRG8ToRGBG8
{4, 4, 4, 3},
// kBGRG8ToRGB8
{4, 4, 8, 3},
// kR10G11B11ToRGBA16
{4, 4, 8, 3},
// kR10G11B11ToRGBA16SNorm
{4, 4, 8, 3},
// kR11G11B10ToRGBA16
{4, 4, 8, 3},
// kR11G11B10ToRGBA16SNorm
{4, 4, 8, 3},
// kR16UNormToFloat
{4, 4, 2, 4},
// kR16SNormToFloat
{4, 4, 2, 4},
// kRG16UNormToFloat
{4, 4, 4, 3},
// kRG16SNormToFloat
{4, 4, 4, 3},
// kRGBA16UNormToFloat
{4, 4, 8, 2},
// kRGBA16SNormToFloat
{4, 4, 8, 2},
// kDXT1ToRGBA8
{4, 4, 4, 2},
// kDXT3ToRGBA8
{4, 4, 4, 1},
// kDXT5ToRGBA8
{4, 4, 4, 1},
// kDXNToRG8
{4, 4, 2, 1},
// kDXT3A
{4, 4, 1, 2},
// kDXT3AAs1111ToBGRA4
{4, 4, 2, 2},
// kDXT3AAs1111ToARGB4
{4, 4, 2, 2},
// kDXT5AToR8
{4, 4, 1, 2},
// kCTX1
{4, 4, 2, 2},
// kDepthUnorm
{4, 4, 4, 3},
// kDepthFloat
{4, 4, 4, 3},
};
TextureCache::TextureCache(const RegisterFile& register_file, TextureCache::TextureCache(const RegisterFile& register_file,
SharedMemory& shared_memory, SharedMemory& shared_memory,
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_x,

View File

@ -395,6 +395,69 @@ class TextureCache {
uint32_t height_texels; uint32_t height_texels;
}; };
static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2;
static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5;
enum LoadShaderIndex {
kLoadShaderIndex8bpb,
kLoadShaderIndex16bpb,
kLoadShaderIndex32bpb,
kLoadShaderIndex64bpb,
kLoadShaderIndex128bpb,
kLoadShaderIndexR5G5B5A1ToB5G5R5A1,
kLoadShaderIndexR5G6B5ToB5G6R5,
kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle,
kLoadShaderIndexRGBA4ToBGRA4,
kLoadShaderIndexRGBA4ToARGB4,
kLoadShaderIndexGBGR8ToGRGB8,
kLoadShaderIndexGBGR8ToRGB8,
kLoadShaderIndexBGRG8ToRGBG8,
kLoadShaderIndexBGRG8ToRGB8,
kLoadShaderIndexR10G11B11ToRGBA16,
kLoadShaderIndexR10G11B11ToRGBA16SNorm,
kLoadShaderIndexR11G11B10ToRGBA16,
kLoadShaderIndexR11G11B10ToRGBA16SNorm,
kLoadShaderIndexR16UNormToFloat,
kLoadShaderIndexR16SNormToFloat,
kLoadShaderIndexRG16UNormToFloat,
kLoadShaderIndexRG16SNormToFloat,
kLoadShaderIndexRGBA16UNormToFloat,
kLoadShaderIndexRGBA16SNormToFloat,
kLoadShaderIndexDXT1ToRGBA8,
kLoadShaderIndexDXT3ToRGBA8,
kLoadShaderIndexDXT5ToRGBA8,
kLoadShaderIndexDXNToRG8,
kLoadShaderIndexDXT3A,
kLoadShaderIndexDXT3AAs1111ToBGRA4,
kLoadShaderIndexDXT3AAs1111ToARGB4,
kLoadShaderIndexDXT5AToR8,
kLoadShaderIndexCTX1,
kLoadShaderIndexDepthUnorm,
kLoadShaderIndexDepthFloat,
kLoadShaderCount,
kLoadShaderIndexUnknown = kLoadShaderCount,
};
struct LoadShaderInfo {
// Log2 of the sizes, in bytes, of the elements in the source (guest) and
// the destination (host) buffer bindings accessed by the copying shader,
// since the shader may copy multiple blocks per one invocation.
uint32_t source_bpe_log2;
uint32_t dest_bpe_log2;
// Number of bytes in a host resolution-scaled block (corresponding to a
// guest block if not decompressing, or a host texel if decompressing)
// written by the shader.
uint32_t bytes_per_host_block;
// Log2 of the number of guest resolution-scaled blocks along the X axis
// loaded by a single thread shader group.
uint32_t guest_x_blocks_per_thread_log2;
uint32_t GetGuestXBlocksPerGroupLog2() const {
return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2;
}
};
static constexpr uint8_t kSwizzledSignsUnsigned = static constexpr uint8_t kSwizzledSignsUnsigned =
uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101); uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101);
@ -472,6 +535,11 @@ class TextureCache {
// should be made. // should be made.
Texture* FindOrCreateTexture(TextureKey key); Texture* FindOrCreateTexture(TextureKey key);
static const LoadShaderInfo& GetLoadShaderInfo(
LoadShaderIndex load_shader_index) {
assert_true(load_shader_index < kLoadShaderCount);
return load_shader_info_[load_shader_index];
}
bool LoadTextureData(Texture& texture); bool LoadTextureData(Texture& texture);
// Writes the texture data (for base, mips or both - but not neither) from the // Writes the texture data (for base, mips or both - but not neither) from the
// shared memory or the scaled resolve memory. The shared memory management is // shared memory or the scaled resolve memory. The shared memory management is
@ -527,6 +595,8 @@ class TextureCache {
uint32_t draw_resolution_scale_x_; uint32_t draw_resolution_scale_x_;
uint32_t draw_resolution_scale_y_; uint32_t draw_resolution_scale_y_;
static const LoadShaderInfo load_shader_info_[kLoadShaderCount];
xe::global_critical_region global_critical_region_; xe::global_critical_region global_critical_region_;
// Bit vector storing whether each 4 KB physical memory page contains scaled // Bit vector storing whether each 4 KB physical memory page contains scaled
// resolve data. uint32_t rather than uint64_t because parts of it can be sent // resolve data. uint32_t rather than uint64_t because parts of it can be sent

View File

@ -391,6 +391,12 @@ TextureGuestLayout GetGuestTextureLayout(
// 2D 32x32-block tiles are laid out linearly in the texture. // 2D 32x32-block tiles are laid out linearly in the texture.
// Calculate the extent as ((all rows except for the last * pitch in // Calculate the extent as ((all rows except for the last * pitch in
// tiles + last row length in tiles) * bytes per tile). // tiles + last row length in tiles) * bytes per tile).
// FIXME(Triang3l): This is wrong for 1bpb and 2bpb. At 1bpb (32x32 is
// 1024 bytes), offset for X + 32 minus offset for X is 512, not 1024,
// but offset for X + 128 minus offset for X + 96 is 2560. Also, for
// XY = 0...31, the extent of the addresses is 2560, not 1024. At 2bpb,
// addressing repeats every 64x64, and the extent for XY = 0...31 is
// 3072, not 2048.
level_layout.array_slice_data_extent_bytes = level_layout.array_slice_data_extent_bytes =
(level_layout.y_extent_blocks - xenos::kTextureTileWidthHeight) * (level_layout.y_extent_blocks - xenos::kTextureTileWidthHeight) *
level_layout.row_pitch_bytes + level_layout.row_pitch_bytes +

View File

@ -173,8 +173,8 @@ struct TextureGuestLayout {
// If mip_max_level specified at calculation time is at least 1, the stored // If mip_max_level specified at calculation time is at least 1, the stored
// mips are min(1, packed_mip_level) through min(mip_max_level, // mips are min(1, packed_mip_level) through min(mip_max_level,
// packed_mip_level). // packed_mip_level).
Level mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; Level mips[xenos::kTextureMaxMips];
uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; uint32_t mip_offsets_bytes[xenos::kTextureMaxMips];
uint32_t mips_total_extent_bytes; uint32_t mips_total_extent_bytes;
uint32_t max_level; uint32_t max_level;
// UINT32_MAX if there's no packed mip tail. // UINT32_MAX if there's no packed mip tail.
@ -207,6 +207,11 @@ void GetTextureTotalSize(xenos::DataDimension dimension,
// Offset3D(X * 32, Y * 32, Z * 8) + Offset3D(x, y, z) // Offset3D(X * 32, Y * 32, Z * 8) + Offset3D(x, y, z)
// (true for negative offsets too). // (true for negative offsets too).
// - 2D 32x32 tiles are laid out linearly. // - 2D 32x32 tiles are laid out linearly.
// FIXME(Triang3l): This is wrong for 1bpb and 2bpb. At 1bpb (32x32 is 1024
// bytes), offset for X + 32 minus offset for X is 512, not 1024, but offset for
// X + 128 minus offset for X + 96 is 2560. Also, for XY = 0...31, the extent of
// the addresses is 2560, not 1024. At 2bpb, addressing repeats every 64x64, and
// the extent for XY = 0...31 is 3072, not 2048.
// - 3D tiled texture slices 0:3 and 4:7 are stored separately in memory, in // - 3D tiled texture slices 0:3 and 4:7 are stored separately in memory, in
// non-overlapping ranges, but addressing in 4:7 is different than in 0:3. // non-overlapping ranges, but addressing in 4:7 is different than in 0:3.
// - Addressing of blocks that are contiguous along X (for tiling/untiling of // - Addressing of blocks that are contiguous along X (for tiling/untiling of

View File

@ -1045,6 +1045,10 @@ constexpr uint32_t kTexture3DMaxWidthHeight = 1 << kTexture3DMaxWidthHeightLog2;
constexpr uint32_t kTexture3DMaxDepthLog2 = 10; constexpr uint32_t kTexture3DMaxDepthLog2 = 10;
constexpr uint32_t kTexture3DMaxDepth = 1 << kTexture3DMaxDepthLog2; constexpr uint32_t kTexture3DMaxDepth = 1 << kTexture3DMaxDepthLog2;
constexpr uint32_t kTextureMaxMips =
std::max(kTexture2DCubeMaxWidthHeightLog2, kTexture3DMaxWidthHeightLog2) +
1;
// Tiled texture sizes are in 32x32 increments for 2D, 32x32x4 for 3D. // Tiled texture sizes are in 32x32 increments for 2D, 32x32x4 for 3D.
// 2DTiledOffset(X * 32 + x, Y * 32 + y) == // 2DTiledOffset(X * 32 + x, Y * 32 + y) ==
// 2DTiledOffset(X * 32, Y * 32) + 2DTiledOffset(x, y) // 2DTiledOffset(X * 32, Y * 32) + 2DTiledOffset(x, y)