xenia-canary/src/xenia/gpu/texture_cache.h

670 lines
29 KiB
C++

/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_TEXTURE_CACHE_H_
#define XENIA_GPU_TEXTURE_CACHE_H_
#include <array>
#include <atomic>
#include <cstdint>
#include <cstring>
#include <memory>
#include <unordered_map>
#include "xenia/base/assert.h"
#include "xenia/base/hash.h"
#include "xenia/base/math.h"
#include "xenia/base/mutex.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/shared_memory.h"
#include "xenia/gpu/texture_util.h"
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
// Manages host copies of guest textures, performing untiling, format and endian
// conversion of textures stored in the shared memory, and also handling
// invalidation.
//
// Mipmaps are treated the following way, according to the GPU hang message
// found in game executables explaining the valid usage of BaseAddress when
// streaming the largest LOD (it says games should not use 0 as the base address
// when the largest LOD isn't loaded, but rather, either allocate a valid
// address for it or make it the same as mip_address):
// - If the texture has a base address, but no mip address, it's not mipmapped -
// the host texture has only the largest level too.
// - If the texture has different non-zero base address and mip address, a host
// texture with mip_max_level+1 mipmaps is created - mip_min_level is ignored
// and treated purely as sampler state because there are tfetch instructions
// working directly with LOD values - including fetching with an explicit LOD.
// However, the max level is not ignored because any mip count can be
// specified when creating a texture, and another texture may be placed after
// the last one.
// - If the texture has a mip address, but the base address is 0 or the same as
// the mip address, a mipmapped texture is created, but min/max LOD is clamped
// to the lower bound of 1 - the game is expected to do that anyway until the
// largest LOD is loaded.
// TODO(Triang3l): Attach the largest LOD to existing textures with a valid
// mip_address but no base ever used yet (no base_address) to save memory
// because textures are streamed this way anyway.
class TextureCache {
public:
// Hard limit, originating from the half-pixel offset filling hack in the
// resolve shaders only filling up to 3 pixels, due to the bit counts used for
// passing the scale to shaders, and because the full 490 MB EDRAM buffer is
// within the minimum Direct3D 12 requirement of 128 * 2^20 texels in a single
// buffer binding (counted as R32 for a byte address buffer).
static constexpr uint32_t kMaxDrawResolutionScaleAlongAxis = 7;
TextureCache(const TextureCache& texture_cache) = delete;
TextureCache& operator=(const TextureCache& texture_cache) = delete;
virtual ~TextureCache();
// Returns whether the actual scale is not smaller than the requested one.
static bool GetConfigDrawResolutionScale(uint32_t& x_out, uint32_t& y_out);
uint32_t draw_resolution_scale_x() const { return draw_resolution_scale_x_; }
uint32_t draw_resolution_scale_y() const { return draw_resolution_scale_y_; }
divisors::MagicDiv draw_resolution_scale_x_divisor() const {
return draw_resolution_scale_x_divisor_;
}
divisors::MagicDiv draw_resolution_scale_y_divisor() const {
return draw_resolution_scale_y_divisor_;
}
bool IsDrawResolutionScaled() const {
return draw_resolution_scale_x_ > 1 || draw_resolution_scale_y_ > 1;
}
virtual void ClearCache();
virtual void CompletedSubmissionUpdated(uint64_t completed_submission_index);
virtual void BeginSubmission(uint64_t new_submission_index);
virtual void BeginFrame();
void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled);
// Ensures the memory backing the range in the scaled resolve address space is
// allocated and returns whether it is.
virtual bool EnsureScaledResolveMemoryCommitted(
uint32_t start_unscaled, uint32_t length_unscaled,
uint32_t length_scaled_alignment_log2 = 0) {
return false;
}
static uint32_t GuestToHostSwizzle(uint32_t guest_swizzle,
uint32_t host_format_swizzle);
void TextureFetchConstantWritten(uint32_t index) {
texture_bindings_in_sync_ &= ~(UINT32_C(1) << index);
}
void TextureFetchConstantsWritten(uint32_t first_index, uint32_t last_index) {
// generate a mask of all bits from before the first index, and xor it with
// all bits before the last index this produces a mask covering only the
// bits between first and last
uint32_t res = ((1U << first_index) - 1) ^
static_cast<uint32_t>((1ULL << (last_index + 1)) - 1ULL);
// todo: check that this is right
texture_bindings_in_sync_ &= ~res;
}
virtual void RequestTextures(uint32_t used_texture_mask);
// "ActiveTexture" means as of the latest RequestTextures call.
uint32_t GetActiveTextureHostSwizzle(uint32_t fetch_constant_index) const {
const TextureBinding* binding =
GetValidTextureBinding(fetch_constant_index);
return binding ? binding->host_swizzle : xenos::XE_GPU_TEXTURE_SWIZZLE_0000;
}
uint8_t GetActiveTextureSwizzledSigns(uint32_t fetch_constant_index) const {
const TextureBinding* binding =
GetValidTextureBinding(fetch_constant_index);
return binding ? binding->swizzled_signs : kSwizzledSignsUnsigned;
}
bool IsActiveTextureResolved(uint32_t fetch_constant_index) const {
const TextureBinding* binding =
GetValidTextureBinding(fetch_constant_index);
if (!binding) {
return false;
}
return (binding->texture && binding->texture->IsResolved()) ||
(binding->texture_signed && binding->texture_signed->IsResolved());
}
template <swcache::PrefetchTag tag>
void PrefetchTextureBinding(uint32_t fetch_constant_index) const {
swcache::Prefetch<tag>(&texture_bindings_[fetch_constant_index]);
swcache::Prefetch<tag>(
&texture_bindings_[fetch_constant_index +
1]); // we may cross a cache line boundary :( size
// of the structure is 0x28
}
protected:
struct TextureKey {
// Dimensions minus 1 are stored similarly to how they're stored in fetch
// constants so fewer bits can be used, while the maximum size (8192 for 2D)
// can still be encoded (a 8192x sky texture is used in 4D530910).
// Physical 4 KB page with the base mip level, disregarding A/C/E address
// range prefix.
uint32_t base_page : 17; // 17 total
xenos::DataDimension dimension : 2; // 19
uint32_t width_minus_1 : 13; // 32
uint32_t height_minus_1 : 13; // 45
uint32_t tiled : 1; // 46
uint32_t packed_mips : 1; // 47
// Physical 4 KB page with mip 1 and smaller.
uint32_t mip_page : 17; // 64
// (Layers for stacked and 3D, 6 for cube, 1 for other dimensions) - 1.
uint32_t depth_or_array_size_minus_1 : 10; // 74
uint32_t pitch : 9; // 83
uint32_t mip_max_level : 4; // 87
xenos::TextureFormat format : 6; // 93
xenos::Endian endianness : 2; // 95
// Whether this texture is signed and has a different host representation
// than an unsigned view of the same guest texture.
uint32_t signed_separate : 1; // 96
// Whether this texture is a resolution-scaled resolve target.
uint32_t scaled_resolve : 1; // 97
// Least important in ==, so placed last.
uint32_t is_valid : 1; // 98
TextureKey() { MakeInvalid(); }
TextureKey(const TextureKey& key) {
std::memcpy(this, &key, sizeof(*this));
}
TextureKey& operator=(const TextureKey& key) {
std::memcpy(this, &key, sizeof(*this));
return *this;
}
void MakeInvalid() {
// Zero everything, including the padding, for a stable hash.
std::memset(this, 0, sizeof(*this));
}
using Hasher = xe::hash::XXHasher<TextureKey>;
bool operator==(const TextureKey& key) const {
return !std::memcmp(this, &key, sizeof(*this));
}
bool operator!=(const TextureKey& key) const { return !(*this == key); }
uint32_t GetWidth() const { return width_minus_1 + 1; }
uint32_t GetHeight() const { return height_minus_1 + 1; }
uint32_t GetDepthOrArraySize() const {
return depth_or_array_size_minus_1 + 1;
}
texture_util::TextureGuestLayout GetGuestLayout() const {
return texture_util::GetGuestTextureLayout(
dimension, pitch, GetWidth(), GetHeight(), GetDepthOrArraySize(),
tiled, format, packed_mips, base_page != 0, mip_max_level);
}
static const char* GetLogDimensionName(xenos::DataDimension dimension);
const char* GetLogDimensionName() const {
return GetLogDimensionName(dimension);
}
void LogAction(const char* action) const;
};
class Texture {
public:
Texture(const Texture& texture) = delete;
Texture& operator=(const Texture& texture) = delete;
virtual ~Texture();
TextureCache& texture_cache() const { return texture_cache_; }
const TextureKey& key() const { return key_; }
const texture_util::TextureGuestLayout& guest_layout() const {
return guest_layout_;
}
uint32_t GetGuestBaseSize() const {
return guest_layout().base.level_data_extent_bytes;
}
uint32_t GetGuestMipsSize() const {
return guest_layout().mips_total_extent_bytes;
}
uint64_t GetHostMemoryUsage() const { return host_memory_usage_; }
uint64_t last_usage_submission_index() const {
return last_usage_submission_index_;
}
uint64_t last_usage_time() const { return last_usage_time_; }
bool GetBaseResolved() const { return base_resolved_; }
void SetBaseResolved(bool base_resolved) {
assert_false(!base_resolved && key().scaled_resolve);
base_resolved_ = base_resolved;
}
bool GetMipsResolved() const { return mips_resolved_; }
void SetMipsResolved(bool mips_resolved) {
assert_false(!mips_resolved && key().scaled_resolve);
mips_resolved_ = mips_resolved;
}
bool IsResolved() const { return base_resolved_ || mips_resolved_; }
bool base_outdated(const global_unique_lock_type& global_lock) const {
return base_outdated_;
}
bool mips_outdated(const global_unique_lock_type& global_lock) const {
return mips_outdated_;
}
void MakeUpToDateAndWatch(const global_unique_lock_type& global_lock);
void WatchCallback(const global_unique_lock_type& global_lock, bool is_mip);
// For LRU caching - updates the last usage frame and moves the texture to
// the end of the usage queue. Must be called any time the texture is
// referenced by any GPU work in the implementation to make sure it's not
// destroyed while still in use.
void MarkAsUsed();
void LogAction(const char* action) const;
protected:
explicit Texture(TextureCache& texture_cache, const TextureKey& key);
void SetHostMemoryUsage(uint64_t new_host_memory_usage) {
texture_cache_.UpdateTexturesTotalHostMemoryUsage(new_host_memory_usage,
host_memory_usage_);
host_memory_usage_ = new_host_memory_usage;
}
private:
TextureCache& texture_cache_;
TextureKey key_;
texture_util::TextureGuestLayout guest_layout_;
uint64_t host_memory_usage_ = 0;
uint64_t last_usage_submission_index_;
uint64_t last_usage_time_;
Texture* used_previous_;
Texture* used_next_;
// Whether the most up-to-date base / mips contain pages with data from a
// resolve operation (rather than from the CPU or memexport), primarily for
// choosing between piecewise linear gamma and sRGB when the former is
// emulated with the latter.
bool base_resolved_;
bool mips_resolved_;
// These are to be accessed within the global critical region to synchronize
// with shared memory.
// Whether the recent base level data needs reloading from the memory.
bool base_outdated_ = false;
// Whether the recent mip data needs reloading from the memory.
bool mips_outdated_ = false;
// Watch handles for the memory ranges.
SharedMemory::WatchHandle base_watch_handle_ = nullptr;
SharedMemory::WatchHandle mips_watch_handle_ = nullptr;
};
// Rules of data access in load shaders:
// - Source reading (from the shared memory or the scaled resolve buffer):
// - Guest data may be stored in a sparsely-allocated buffer, or, in
// Direct3D 12 terms, a tiled buffer. This means that some regions of the
// buffer may not be mapped. On tiled resources tier 1 hardware, accessing
// unmapped tiles results in undefined behavior, including a GPU page
// fault and device removal. So, shaders must not try to access
// potentially unmapped regions (that are outside the texture memory
// extents calculated on the CPU, taking into account that Xenia can't
// overestimate texture sizes freely since it must not try to upload
// unallocated pages on the CPU).
// - Buffer tiles have 64 KB size on Direct3D 12. Vulkan has its own
// alignment requirements for sparse binding. But overall, we're
// allocating pretty large regions.
// - Resolution scaling disabled:
// - Shared memory allocates regions of power of two sizes that map
// directly to the same portions of the 512 MB of the console's
// physical memory. So, a 64 KB-aligned host buffer region is also 64
// KB-aligned in the guest address space.
// - Tiled textures: 32x32x4-block tiles are always resident each as a
// whole. If the width is bigger than the pitch, the overflowing 32x32x4
// tiles are also loaded as entire tiles. We do not have separate
// shaders for 2D and 3D. So, for tiled textures, it's safe to consider
// that if any location within a 32x32-aligned portion is within the
// texture bounds, the entire 32x32 portion also can be read.
// - Linear textures: Pitch is aligned to 256 bytes. Row count, however,
// is not aligned to anything (unless the mip tail is being loaded). The
// overflowing last row in case `width > pitch`, however, is made
// resident up to the last texel in it. But row start alignment is 256,
// which is a power of two, and is smaller than the Direct3D 12 tile
// size of 64 KB. So, if any block within a 256-aligned region is within
// the texture bounds, without resolution scaling, reading from any
// location in that 256-aligned region is safe.
// - Since we use the same shaders for tiled and linear textures (as well
// as 1D textures), this means that without resolution scaling, it's
// safe to access a min(256 bytes, 32 blocks)-aligned portion along X,
// but only within the same row of blocks, with bounds checking only for
// such portion as a whole, but without additional bounds checking
// inside of it.
// - Therefore, it's recommended that shaders read power-of-two amounts of
// blocks (so there will naturally be some alignment to some power of
// two), and this way, each thread may read at most 16 16bpb blocks or
// at most 32 8bpb or smaller blocks with in a single `if (x < width)`
// for the whole aligned range of the same length.
// - Resolution scaling enabled:
// - For simplicity, unlike in the shared memory, buffer tile boundaries
// are not aligned to powers of 2 the same way as guest addresses are.
// While for 2x2 resolution scaling it still happens to be the case
// because `host scaling unit address = guest scaling unit address << 2`
// (similarly for 2x1 and 1x2), for 3x or x3, it's not - a 64 KB host
// tile would represent 7281.777 guest bytes with 3x3 (disregarding that
// sequences of texels that are adjacent in memory alongside the
// horizontal axis, not individual bytes, are scaled, but even in that
// case it's not scaling by 2^n still).
// - The above would affect the `width > pitch` case for linear textures,
// requiring overestimating the width in calculation of the range of the
// tiles to map, while not doing this overestimation on the guest memory
// extent calculation side (otherwise it may result in attempting to
// upload unallocated memory on the CPU). For example, let's take look
// at an extreme case of a 369x28 k_8 texture with a pitch of 256 bytes.
// The last row, in guest memory, would be loaded from the [7168, 7281)
// range, or, with 3x3 resolution scaling, from bytes [64512, 65529).
// However, if we try to unconditionally load 2 pixels, like the texture
// is 370x28, we will be accessing the bytes [64512, 65538). But bytes
// 65536 and 65537 will be in another 64 KB tile, which may be not
// mapped yet. However, none of this is an issue for one simple reason -
// resolving is only possible to tiled textures, so linear textures will
// never be resolution-scaled.
// - Tiled textures have potentially referenced guest 32x32-block tiles
// loaded in their entirety. So, just like for unscaled textures, if any
// block within a tile is available, the entire tile is as well.
// - Destination writing (to the linear buffer):
// - host_x_blocks_per_thread specifies how many pixels can be written
// without bounds checking within increments of that amount - the pitch of
// the destination buffer is manually overaligned if needed.
// In textures, resolution scaling is done for 8-byte portions of memory for
// 8bpp textures, and for 16-byte portions for textures of higher bit depths
// (these are the sizes of regions where contiguous texels in memory are also
// contiguous in the texture along the horizontal axis, so 64-bit and 128-bit
// loads / stores, for 8bpp and 16bpp+ respectively, can be used for untiling
// regardless of the resolution scale).
struct LoadConstants {
uint32_t is_tiled_3d_endian_scale;
// Base offset in bytes, resolution-scaled.
uint32_t guest_offset;
// For tiled textures - row pitch in blocks, aligned to 32, unscaled.
// For linear textures - row pitch in bytes.
uint32_t guest_pitch_aligned;
// For 3D textures only (ignored otherwise) - aligned to 32, unscaled.
uint32_t guest_z_stride_block_rows_aligned;
// - std140 vector boundary -
// If this is a packed mip tail, this is aligned to tile dimensions.
// Resolution-scaled.
uint32_t size_blocks[3];
// Base offset in bytes.
uint32_t host_offset;
// - std140 vector boundary -
uint32_t host_pitch;
uint32_t height_texels;
};
static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2;
static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5;
enum LoadShaderIndex {
kLoadShaderIndex8bpb,
kLoadShaderIndex16bpb,
kLoadShaderIndex32bpb,
kLoadShaderIndex64bpb,
kLoadShaderIndex128bpb,
kLoadShaderIndexR5G5B5A1ToB5G5R5A1,
kLoadShaderIndexR5G6B5ToB5G6R5,
kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle,
kLoadShaderIndexRGBA4ToBGRA4,
kLoadShaderIndexRGBA4ToARGB4,
kLoadShaderIndexGBGR8ToGRGB8,
kLoadShaderIndexGBGR8ToRGB8,
kLoadShaderIndexBGRG8ToRGBG8,
kLoadShaderIndexBGRG8ToRGB8,
kLoadShaderIndexR10G11B11ToRGBA16,
kLoadShaderIndexR10G11B11ToRGBA16SNorm,
kLoadShaderIndexR11G11B10ToRGBA16,
kLoadShaderIndexR11G11B10ToRGBA16SNorm,
kLoadShaderIndexR16UNormToFloat,
kLoadShaderIndexR16SNormToFloat,
kLoadShaderIndexRG16UNormToFloat,
kLoadShaderIndexRG16SNormToFloat,
kLoadShaderIndexRGBA16UNormToFloat,
kLoadShaderIndexRGBA16SNormToFloat,
kLoadShaderIndexDXT1ToRGBA8,
kLoadShaderIndexDXT3ToRGBA8,
kLoadShaderIndexDXT5ToRGBA8,
kLoadShaderIndexDXNToRG8,
kLoadShaderIndexDXT3A,
kLoadShaderIndexDXT3AAs1111ToBGRA4,
kLoadShaderIndexDXT3AAs1111ToARGB4,
kLoadShaderIndexDXT5AToR8,
kLoadShaderIndexCTX1,
kLoadShaderIndexDepthUnorm,
kLoadShaderIndexDepthFloat,
kLoadShaderCount,
kLoadShaderIndexUnknown = kLoadShaderCount,
};
struct LoadShaderInfo {
// Log2 of the sizes, in bytes, of the elements in the source (guest) and
// the destination (host) buffer bindings accessed by the copying shader,
// since the shader may copy multiple blocks per one invocation.
uint32_t source_bpe_log2;
uint32_t dest_bpe_log2;
// Number of bytes in a host resolution-scaled block (corresponding to a
// guest block if not decompressing, or a host texel if decompressing)
// written by the shader.
uint32_t bytes_per_host_block;
// Log2 of the number of guest resolution-scaled blocks along the X axis
// loaded by a single thread shader group.
uint32_t guest_x_blocks_per_thread_log2;
uint32_t GetGuestXBlocksPerGroupLog2() const {
return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2;
}
};
static constexpr uint8_t kSwizzledSignsUnsigned =
uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101);
struct TextureBinding {
TextureKey key;
// Destination swizzle merged with guest to host format swizzle.
uint32_t host_swizzle;
// Packed TextureSign values, 2 bit per each component, with guest-side
// destination swizzle from the fetch constant applied to them.
uint8_t swizzled_signs;
// Unsigned version of the texture (or signed if they have the same data).
Texture* texture;
// Signed version of the texture if the data in the signed version is
// different on the host.
Texture* texture_signed;
TextureBinding() { Reset(); }
void Reset() {
std::memset(this, 0, sizeof(*this));
host_swizzle = xenos::XE_GPU_TEXTURE_SWIZZLE_0000;
swizzled_signs = kSwizzledSignsUnsigned;
}
};
explicit TextureCache(const RegisterFile& register_file,
SharedMemory& shared_memory,
uint32_t draw_resolution_scale_x,
uint32_t draw_resolution_scale_y);
const RegisterFile& register_file() const { return register_file_; }
SharedMemory& shared_memory() const { return shared_memory_; }
// May be called for purposes like clearing the cache, as well as in the
// destructor of the implementation if textures, for instance, have references
// to the implementation that are used in their destructor, and will become
// invalid if the implementation is destroyed before the texture.
void DestroyAllTextures(bool from_destructor = false);
// Whether the signed version of the texture has a different representation on
// the host than its unsigned version (for example, if it's a fixed-point
// texture emulated with a larger host pixel format).
virtual bool IsSignedVersionSeparateForFormat(TextureKey key) const {
return false;
}
// Parameters like whether the texture is tiled and its dimensions are checked
// externally, the implementation should take only format-related parameters
// such as the format itself and the signedness into account.
virtual bool IsScaledResolveSupportedForFormat(TextureKey key) const {
return false;
}
// For formats with less than 4 components, implementations normally should
// replicate the last component into the non-existent ones, similar to what is
// done for unused components of operands in shaders by Microsoft's Xbox 360
// shader compiler (.xxxx, .xyyy, .xyzz, .xyzw).
// For DXT3A and DXT5A, RRRR swizzle is specified in:
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
// 4D5307E6 also expects replicated components in k_8 sprites.
// DXN is read as RG in 4D5307E6, but as RA in 415607E6.
// TODO(Triang3l): Find out the correct contents of unused texture components.
virtual uint32_t GetHostFormatSwizzle(TextureKey key) const = 0;
virtual uint32_t GetMaxHostTextureWidthHeight(
xenos::DataDimension dimension) const = 0;
virtual uint32_t GetMaxHostTextureDepthOrArraySize(
xenos::DataDimension dimension) const = 0;
// The texture must be created exactly with this key (if the implementation
// supports the texture with this key, otherwise, or in case of a runtime
// failure, it should return nullptr), modifying it is not allowed.
virtual std::unique_ptr<Texture> CreateTexture(TextureKey key) = 0;
// Returns nullptr not only if the key is not supported, but also if couldn't
// create the texture - if it's nullptr, occasionally a recreation attempt
// should be made.
Texture* FindOrCreateTexture(TextureKey key);
static const LoadShaderInfo& GetLoadShaderInfo(
LoadShaderIndex load_shader_index) {
assert_true(load_shader_index < kLoadShaderCount);
return load_shader_info_[load_shader_index];
}
bool LoadTextureData(Texture& texture);
void LoadTexturesData(Texture** textures, uint32_t n_textures);
// Writes the texture data (for base, mips or both - but not neither) from the
// shared memory or the scaled resolve memory. The shared memory management is
// done outside this function, the implementation just needs to load the data
// into the texture object.
virtual bool LoadTextureDataFromResidentMemoryImpl(Texture& texture,
bool load_base,
bool load_mips) = 0;
// Converts a texture fetch constant to a texture key, normalizing and
// validating the values, or creating an invalid key, and also gets the
// post-guest-swizzle signedness.
static void BindingInfoFromFetchConstant(
const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out,
uint8_t* swizzled_signs_out);
// Makes all texture bindings invalid. Also requesting textures after calling
// this will cause another attempt to create a texture or to untile it if
// there was an error.
void ResetTextureBindings(bool from_destructor = false);
const TextureBinding* GetValidTextureBinding(
uint32_t fetch_constant_index) const {
const TextureBinding& binding = texture_bindings_[fetch_constant_index];
return binding.key.is_valid ? &binding : nullptr;
}
// Called when something in a texture binding is changed for the
// implementation to update the internal dependencies of the binding.
virtual void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) {}
private:
void UpdateTexturesTotalHostMemoryUsage(uint64_t add, uint64_t subtract);
// Shared memory callback for texture data invalidation.
static void WatchCallback(const global_unique_lock_type& global_lock,
void* context, void* data, uint64_t argument,
bool invalidated_by_gpu);
// Checks if there are any pages that contain scaled resolve data within the
// range.
bool IsRangeScaledResolved(uint32_t start_unscaled, uint32_t length_unscaled);
// Global shared memory invalidation callback for invalidating scaled resolved
// texture data.
static void ScaledResolveGlobalWatchCallbackThunk(
const global_unique_lock_type& global_lock, void* context,
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
void ScaledResolveGlobalWatchCallback(
const global_unique_lock_type& global_lock, uint32_t address_first,
uint32_t address_last, bool invalidated_by_gpu);
const RegisterFile& register_file_;
SharedMemory& shared_memory_;
uint32_t draw_resolution_scale_x_;
uint32_t draw_resolution_scale_y_;
divisors::MagicDiv draw_resolution_scale_x_divisor_;
divisors::MagicDiv draw_resolution_scale_y_divisor_;
static const LoadShaderInfo load_shader_info_[kLoadShaderCount];
xe::global_critical_region global_critical_region_;
// Bit vector storing whether each 4 KB physical memory page contains scaled
// resolve data. uint32_t rather than uint64_t because parts of it can be sent
// to shaders.
std::unique_ptr<uint32_t[]> scaled_resolve_pages_;
// Second level of the bit vector for faster rejection of non-scaled textures.
// >> 12 for 4 KB pages, >> 5 for uint32_t level 1 bits, >> 6 for uint64_t
// level 2 bits.
uint64_t scaled_resolve_pages_l2_[SharedMemory::kBufferSize >> (12 + 5 + 6)];
// Global watch for scaled resolve data invalidation.
SharedMemory::GlobalWatchHandle scaled_resolve_global_watch_handle_ = nullptr;
uint64_t current_submission_index_ = 0;
uint64_t current_submission_time_ = 0;
std::unordered_map<TextureKey, std::unique_ptr<Texture>, TextureKey::Hasher>
textures_;
uint64_t textures_total_host_memory_usage_ = 0;
Texture* texture_used_first_ = nullptr;
Texture* texture_used_last_ = nullptr;
// Whether a texture has become outdated (a memory watch has been triggered),
// so need to recheck if textures aren't outdated, disregarding whether fetch
// constants have been changed.
std::atomic<bool> texture_became_outdated_{false};
std::array<TextureBinding, xenos::kTextureFetchConstantCount>
texture_bindings_;
// Bit vector with bits reset on fetch constant writes to avoid parsing fetch
// constants again and again.
uint32_t texture_bindings_in_sync_ = 0;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_TEXTURE_CACHE_H_