Merge branch 'master' into vulkan
This commit is contained in:
commit
aac28f19d1
|
@ -562,7 +562,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// Unsubmitted barrier batch.
|
||||
std::vector<D3D12_RESOURCE_BARRIER> barriers_;
|
||||
|
||||
// <Resource, submission where requested>, sorted by the submission number.
|
||||
// <Submission where requested, resource>, sorted by the submission number.
|
||||
std::deque<std::pair<uint64_t, ID3D12Resource*>> resources_for_deletion_;
|
||||
|
||||
static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -179,85 +179,23 @@ class D3D12TextureCache final : public TextureCache {
|
|||
static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2;
|
||||
static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5;
|
||||
|
||||
enum class LoadMode {
|
||||
k8bpb,
|
||||
k16bpb,
|
||||
k32bpb,
|
||||
k64bpb,
|
||||
k128bpb,
|
||||
kR5G5B5A1ToB5G5R5A1,
|
||||
kR5G6B5ToB5G6R5,
|
||||
kR5G5B6ToB5G6R5WithRBGASwizzle,
|
||||
kR4G4B4A4ToB4G4R4A4,
|
||||
kGBGR8ToGRGB8,
|
||||
kGBGR8ToRGB8,
|
||||
kBGRG8ToRGBG8,
|
||||
kBGRG8ToRGB8,
|
||||
kR10G11B11ToRGBA16,
|
||||
kR10G11B11ToRGBA16SNorm,
|
||||
kR11G11B10ToRGBA16,
|
||||
kR11G11B10ToRGBA16SNorm,
|
||||
kDXT1ToRGBA8,
|
||||
kDXT3ToRGBA8,
|
||||
kDXT5ToRGBA8,
|
||||
kDXNToRG8,
|
||||
kDXT3A,
|
||||
kDXT3AAs1111ToBGRA4,
|
||||
kDXT5AToR8,
|
||||
kCTX1,
|
||||
kDepthUnorm,
|
||||
kDepthFloat,
|
||||
|
||||
kCount,
|
||||
|
||||
kUnknown = kCount
|
||||
};
|
||||
|
||||
struct LoadModeInfo {
|
||||
// Shader without resolution scaling.
|
||||
const void* shader;
|
||||
size_t shader_size;
|
||||
// Shader with resolution scaling, if available. These shaders are separate
|
||||
// so the majority of the textures are not affected by the code needed for
|
||||
// resolution scale support, and also to check if the format allows
|
||||
// resolution scaling.
|
||||
const void* shader_scaled;
|
||||
size_t shader_scaled_size;
|
||||
// Log2 of the sizes, in bytes, of the source (guest) SRV and the
|
||||
// destination (host) UAV accessed by the copying shader, since the shader
|
||||
// may copy multiple blocks per one invocation.
|
||||
uint32_t srv_bpe_log2;
|
||||
uint32_t uav_bpe_log2;
|
||||
// Number of bytes in a host resolution-scaled block (corresponding to a
|
||||
// guest block if not decompressing, or a host texel if decompressing)
|
||||
// written by the shader.
|
||||
uint32_t bytes_per_host_block;
|
||||
// Log2 of the number of guest resolution-scaled blocks along the X axis
|
||||
// loaded by a single thread shader group.
|
||||
uint32_t guest_x_blocks_per_thread_log2;
|
||||
|
||||
uint32_t GetGuestXBlocksPerGroupLog2() const {
|
||||
return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2;
|
||||
}
|
||||
};
|
||||
|
||||
struct HostFormat {
|
||||
// Format info for the regular case.
|
||||
// DXGI format (typeless when different signedness or number representation
|
||||
// is used) for the texture resource.
|
||||
DXGI_FORMAT dxgi_format_resource;
|
||||
// DXGI format for unsigned normalized or unsigned/signed float SRV.
|
||||
DXGI_FORMAT dxgi_format_unorm;
|
||||
// The regular load mode, used when special modes (like signed-specific or
|
||||
// decompressing) aren't needed.
|
||||
LoadMode load_mode;
|
||||
DXGI_FORMAT dxgi_format_unsigned;
|
||||
// The regular load shader, used when special load shaders (like
|
||||
// signed-specific or decompressing) aren't needed.
|
||||
LoadShaderIndex load_shader;
|
||||
// DXGI format for signed normalized or unsigned/signed float SRV.
|
||||
DXGI_FORMAT dxgi_format_snorm;
|
||||
DXGI_FORMAT dxgi_format_signed;
|
||||
// If the signed version needs a different bit representation on the host,
|
||||
// this is the load mode for the signed version. Otherwise the regular
|
||||
// load_mode will be used for the signed version, and a single copy will be
|
||||
// created if both unsigned and signed are used.
|
||||
LoadMode load_mode_snorm;
|
||||
// this is the load shader for the signed version. Otherwise the regular
|
||||
// load_shader will be used for the signed version, and a single copy will
|
||||
// be created if both unsigned and signed are used.
|
||||
LoadShaderIndex load_shader_signed;
|
||||
|
||||
// Do NOT add integer DXGI formats to this - they are not filterable, can
|
||||
// only be read with Load, not Sample! If any game is seen using num_format
|
||||
|
@ -276,7 +214,7 @@ class D3D12TextureCache final : public TextureCache {
|
|||
// supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not
|
||||
// used for DXN and DXT5A.
|
||||
DXGI_FORMAT dxgi_format_uncompressed;
|
||||
LoadMode decompress_mode;
|
||||
LoadShaderIndex load_shader_decompress;
|
||||
|
||||
// Mapping of Xenos swizzle components to DXGI format components.
|
||||
uint32_t swizzle;
|
||||
|
@ -440,13 +378,13 @@ class D3D12TextureCache final : public TextureCache {
|
|||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return IsDecompressionNeeded(format, width, height)
|
||||
? host_format.dxgi_format_uncompressed
|
||||
: host_format.dxgi_format_unorm;
|
||||
: host_format.dxgi_format_unsigned;
|
||||
}
|
||||
static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
|
||||
return GetDXGIUnormFormat(key.format, key.GetWidth(), key.GetHeight());
|
||||
}
|
||||
|
||||
static LoadMode GetLoadMode(TextureKey key);
|
||||
static LoadShaderIndex GetLoadShaderIndex(TextureKey key);
|
||||
|
||||
static constexpr bool AreDimensionsCompatible(
|
||||
xenos::FetchOpDimension binding_dimension,
|
||||
|
@ -528,14 +466,11 @@ class D3D12TextureCache final : public TextureCache {
|
|||
D3D12CommandProcessor& command_processor_;
|
||||
bool bindless_resources_used_;
|
||||
|
||||
static const LoadModeInfo load_mode_info_[];
|
||||
Microsoft::WRL::ComPtr<ID3D12RootSignature> load_root_signature_;
|
||||
std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>,
|
||||
size_t(LoadMode::kCount)>
|
||||
std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>, kLoadShaderCount>
|
||||
load_pipelines_;
|
||||
// Load pipelines for resolution-scaled resolve targets.
|
||||
std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>,
|
||||
size_t(LoadMode::kCount)>
|
||||
std::array<Microsoft::WRL::ComPtr<ID3D12PipelineState>, kLoadShaderCount>
|
||||
load_pipelines_scaled_;
|
||||
|
||||
std::vector<SRVDescriptorCachePage> srv_descriptor_cache_;
|
||||
|
|
|
@ -73,6 +73,80 @@ DEFINE_uint32(
|
|||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
const TextureCache::LoadShaderInfo
|
||||
TextureCache::load_shader_info_[kLoadShaderCount] = {
|
||||
// k8bpb
|
||||
{3, 4, 1, 4},
|
||||
// k16bpb
|
||||
{4, 4, 2, 4},
|
||||
// k32bpb
|
||||
{4, 4, 4, 3},
|
||||
// k64bpb
|
||||
{4, 4, 8, 2},
|
||||
// k128bpb
|
||||
{4, 4, 16, 1},
|
||||
// kR5G5B5A1ToB5G5R5A1
|
||||
{4, 4, 2, 4},
|
||||
// kR5G6B5ToB5G6R5
|
||||
{4, 4, 2, 4},
|
||||
// kR5G5B6ToB5G6R5WithRBGASwizzle
|
||||
{4, 4, 2, 4},
|
||||
// kRGBA4ToBGRA4
|
||||
{4, 4, 2, 4},
|
||||
// kRGBA4ToARGB4
|
||||
{4, 4, 2, 4},
|
||||
// kGBGR8ToGRGB8
|
||||
{4, 4, 4, 3},
|
||||
// kGBGR8ToRGB8
|
||||
{4, 4, 8, 3},
|
||||
// kBGRG8ToRGBG8
|
||||
{4, 4, 4, 3},
|
||||
// kBGRG8ToRGB8
|
||||
{4, 4, 8, 3},
|
||||
// kR10G11B11ToRGBA16
|
||||
{4, 4, 8, 3},
|
||||
// kR10G11B11ToRGBA16SNorm
|
||||
{4, 4, 8, 3},
|
||||
// kR11G11B10ToRGBA16
|
||||
{4, 4, 8, 3},
|
||||
// kR11G11B10ToRGBA16SNorm
|
||||
{4, 4, 8, 3},
|
||||
// kR16UNormToFloat
|
||||
{4, 4, 2, 4},
|
||||
// kR16SNormToFloat
|
||||
{4, 4, 2, 4},
|
||||
// kRG16UNormToFloat
|
||||
{4, 4, 4, 3},
|
||||
// kRG16SNormToFloat
|
||||
{4, 4, 4, 3},
|
||||
// kRGBA16UNormToFloat
|
||||
{4, 4, 8, 2},
|
||||
// kRGBA16SNormToFloat
|
||||
{4, 4, 8, 2},
|
||||
// kDXT1ToRGBA8
|
||||
{4, 4, 4, 2},
|
||||
// kDXT3ToRGBA8
|
||||
{4, 4, 4, 1},
|
||||
// kDXT5ToRGBA8
|
||||
{4, 4, 4, 1},
|
||||
// kDXNToRG8
|
||||
{4, 4, 2, 1},
|
||||
// kDXT3A
|
||||
{4, 4, 1, 2},
|
||||
// kDXT3AAs1111ToBGRA4
|
||||
{4, 4, 2, 2},
|
||||
// kDXT3AAs1111ToARGB4
|
||||
{4, 4, 2, 2},
|
||||
// kDXT5AToR8
|
||||
{4, 4, 1, 2},
|
||||
// kCTX1
|
||||
{4, 4, 2, 2},
|
||||
// kDepthUnorm
|
||||
{4, 4, 4, 3},
|
||||
// kDepthFloat
|
||||
{4, 4, 4, 3},
|
||||
};
|
||||
|
||||
TextureCache::TextureCache(const RegisterFile& register_file,
|
||||
SharedMemory& shared_memory,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
|
|
|
@ -395,6 +395,69 @@ class TextureCache {
|
|||
uint32_t height_texels;
|
||||
};
|
||||
|
||||
static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2;
|
||||
static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5;
|
||||
|
||||
enum LoadShaderIndex {
|
||||
kLoadShaderIndex8bpb,
|
||||
kLoadShaderIndex16bpb,
|
||||
kLoadShaderIndex32bpb,
|
||||
kLoadShaderIndex64bpb,
|
||||
kLoadShaderIndex128bpb,
|
||||
kLoadShaderIndexR5G5B5A1ToB5G5R5A1,
|
||||
kLoadShaderIndexR5G6B5ToB5G6R5,
|
||||
kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle,
|
||||
kLoadShaderIndexRGBA4ToBGRA4,
|
||||
kLoadShaderIndexRGBA4ToARGB4,
|
||||
kLoadShaderIndexGBGR8ToGRGB8,
|
||||
kLoadShaderIndexGBGR8ToRGB8,
|
||||
kLoadShaderIndexBGRG8ToRGBG8,
|
||||
kLoadShaderIndexBGRG8ToRGB8,
|
||||
kLoadShaderIndexR10G11B11ToRGBA16,
|
||||
kLoadShaderIndexR10G11B11ToRGBA16SNorm,
|
||||
kLoadShaderIndexR11G11B10ToRGBA16,
|
||||
kLoadShaderIndexR11G11B10ToRGBA16SNorm,
|
||||
kLoadShaderIndexR16UNormToFloat,
|
||||
kLoadShaderIndexR16SNormToFloat,
|
||||
kLoadShaderIndexRG16UNormToFloat,
|
||||
kLoadShaderIndexRG16SNormToFloat,
|
||||
kLoadShaderIndexRGBA16UNormToFloat,
|
||||
kLoadShaderIndexRGBA16SNormToFloat,
|
||||
kLoadShaderIndexDXT1ToRGBA8,
|
||||
kLoadShaderIndexDXT3ToRGBA8,
|
||||
kLoadShaderIndexDXT5ToRGBA8,
|
||||
kLoadShaderIndexDXNToRG8,
|
||||
kLoadShaderIndexDXT3A,
|
||||
kLoadShaderIndexDXT3AAs1111ToBGRA4,
|
||||
kLoadShaderIndexDXT3AAs1111ToARGB4,
|
||||
kLoadShaderIndexDXT5AToR8,
|
||||
kLoadShaderIndexCTX1,
|
||||
kLoadShaderIndexDepthUnorm,
|
||||
kLoadShaderIndexDepthFloat,
|
||||
|
||||
kLoadShaderCount,
|
||||
kLoadShaderIndexUnknown = kLoadShaderCount,
|
||||
};
|
||||
|
||||
struct LoadShaderInfo {
|
||||
// Log2 of the sizes, in bytes, of the elements in the source (guest) and
|
||||
// the destination (host) buffer bindings accessed by the copying shader,
|
||||
// since the shader may copy multiple blocks per one invocation.
|
||||
uint32_t source_bpe_log2;
|
||||
uint32_t dest_bpe_log2;
|
||||
// Number of bytes in a host resolution-scaled block (corresponding to a
|
||||
// guest block if not decompressing, or a host texel if decompressing)
|
||||
// written by the shader.
|
||||
uint32_t bytes_per_host_block;
|
||||
// Log2 of the number of guest resolution-scaled blocks along the X axis
|
||||
// loaded by a single thread shader group.
|
||||
uint32_t guest_x_blocks_per_thread_log2;
|
||||
|
||||
uint32_t GetGuestXBlocksPerGroupLog2() const {
|
||||
return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2;
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr uint8_t kSwizzledSignsUnsigned =
|
||||
uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101);
|
||||
|
||||
|
@ -472,6 +535,11 @@ class TextureCache {
|
|||
// should be made.
|
||||
Texture* FindOrCreateTexture(TextureKey key);
|
||||
|
||||
static const LoadShaderInfo& GetLoadShaderInfo(
|
||||
LoadShaderIndex load_shader_index) {
|
||||
assert_true(load_shader_index < kLoadShaderCount);
|
||||
return load_shader_info_[load_shader_index];
|
||||
}
|
||||
bool LoadTextureData(Texture& texture);
|
||||
// Writes the texture data (for base, mips or both - but not neither) from the
|
||||
// shared memory or the scaled resolve memory. The shared memory management is
|
||||
|
@ -527,6 +595,8 @@ class TextureCache {
|
|||
uint32_t draw_resolution_scale_x_;
|
||||
uint32_t draw_resolution_scale_y_;
|
||||
|
||||
static const LoadShaderInfo load_shader_info_[kLoadShaderCount];
|
||||
|
||||
xe::global_critical_region global_critical_region_;
|
||||
// Bit vector storing whether each 4 KB physical memory page contains scaled
|
||||
// resolve data. uint32_t rather than uint64_t because parts of it can be sent
|
||||
|
|
|
@ -391,6 +391,12 @@ TextureGuestLayout GetGuestTextureLayout(
|
|||
// 2D 32x32-block tiles are laid out linearly in the texture.
|
||||
// Calculate the extent as ((all rows except for the last * pitch in
|
||||
// tiles + last row length in tiles) * bytes per tile).
|
||||
// FIXME(Triang3l): This is wrong for 1bpb and 2bpb. At 1bpb (32x32 is
|
||||
// 1024 bytes), offset for X + 32 minus offset for X is 512, not 1024,
|
||||
// but offset for X + 128 minus offset for X + 96 is 2560. Also, for
|
||||
// XY = 0...31, the extent of the addresses is 2560, not 1024. At 2bpb,
|
||||
// addressing repeats every 64x64, and the extent for XY = 0...31 is
|
||||
// 3072, not 2048.
|
||||
level_layout.array_slice_data_extent_bytes =
|
||||
(level_layout.y_extent_blocks - xenos::kTextureTileWidthHeight) *
|
||||
level_layout.row_pitch_bytes +
|
||||
|
|
|
@ -173,8 +173,8 @@ struct TextureGuestLayout {
|
|||
// If mip_max_level specified at calculation time is at least 1, the stored
|
||||
// mips are min(1, packed_mip_level) through min(mip_max_level,
|
||||
// packed_mip_level).
|
||||
Level mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
||||
uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
||||
Level mips[xenos::kTextureMaxMips];
|
||||
uint32_t mip_offsets_bytes[xenos::kTextureMaxMips];
|
||||
uint32_t mips_total_extent_bytes;
|
||||
uint32_t max_level;
|
||||
// UINT32_MAX if there's no packed mip tail.
|
||||
|
@ -207,6 +207,11 @@ void GetTextureTotalSize(xenos::DataDimension dimension,
|
|||
// Offset3D(X * 32, Y * 32, Z * 8) + Offset3D(x, y, z)
|
||||
// (true for negative offsets too).
|
||||
// - 2D 32x32 tiles are laid out linearly.
|
||||
// FIXME(Triang3l): This is wrong for 1bpb and 2bpb. At 1bpb (32x32 is 1024
|
||||
// bytes), offset for X + 32 minus offset for X is 512, not 1024, but offset for
|
||||
// X + 128 minus offset for X + 96 is 2560. Also, for XY = 0...31, the extent of
|
||||
// the addresses is 2560, not 1024. At 2bpb, addressing repeats every 64x64, and
|
||||
// the extent for XY = 0...31 is 3072, not 2048.
|
||||
// - 3D tiled texture slices 0:3 and 4:7 are stored separately in memory, in
|
||||
// non-overlapping ranges, but addressing in 4:7 is different than in 0:3.
|
||||
// - Addressing of blocks that are contiguous along X (for tiling/untiling of
|
||||
|
|
|
@ -1045,6 +1045,10 @@ constexpr uint32_t kTexture3DMaxWidthHeight = 1 << kTexture3DMaxWidthHeightLog2;
|
|||
constexpr uint32_t kTexture3DMaxDepthLog2 = 10;
|
||||
constexpr uint32_t kTexture3DMaxDepth = 1 << kTexture3DMaxDepthLog2;
|
||||
|
||||
constexpr uint32_t kTextureMaxMips =
|
||||
std::max(kTexture2DCubeMaxWidthHeightLog2, kTexture3DMaxWidthHeightLog2) +
|
||||
1;
|
||||
|
||||
// Tiled texture sizes are in 32x32 increments for 2D, 32x32x4 for 3D.
|
||||
// 2DTiledOffset(X * 32 + x, Y * 32 + y) ==
|
||||
// 2DTiledOffset(X * 32, Y * 32) + 2DTiledOffset(x, y)
|
||||
|
|
Loading…
Reference in New Issue