xenia-canary/src/xenia/gpu/texture_cache.cc

946 lines
35 KiB
C++
Raw Normal View History

/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/texture_cache.h"
#include <algorithm>
#include <cstdint>
#include <utility>
#include "xenia/base/assert.h"
#include "xenia/base/clock.h"
#include "xenia/base/cvar.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/texture_util.h"
#include "xenia/gpu/xenos.h"
DEFINE_int32(
draw_resolution_scale_x, 1,
"Integer pixel width scale used for scaling the rendering resolution "
"opaquely to the game.\n"
"1, 2 and 3 may be supported, but support of anything above 1 depends on "
"the device properties, such as whether it supports sparse binding / tiled "
"resources, the number of virtual address bits per resource, and other "
"factors.\n"
"Various effects and parts of game rendering pipelines may work "
"incorrectly as pixels become ambiguous from the game's perspective and "
"because half-pixel offset (which normally doesn't affect coverage when "
"MSAA isn't used) becomes full-pixel.",
"GPU");
DEFINE_int32(
draw_resolution_scale_y, 1,
"Integer pixel width scale used for scaling the rendering resolution "
"opaquely to the game.\n"
"See draw_resolution_scale_x for more information.",
"GPU");
DEFINE_uint32(
texture_cache_memory_limit_soft, 384,
"Maximum host texture memory usage (in megabytes) above which old textures "
"will be destroyed.",
"GPU");
DEFINE_uint32(
texture_cache_memory_limit_soft_lifetime, 30,
"Seconds a texture should be unused to be considered old enough to be "
"deleted if texture memory usage exceeds texture_cache_memory_limit_soft.",
"GPU");
DEFINE_uint32(
texture_cache_memory_limit_hard, 768,
"Maximum host texture memory usage (in megabytes) above which textures "
"will be destroyed as soon as possible.",
"GPU");
DEFINE_uint32(
texture_cache_memory_limit_render_to_texture, 24,
"Part of the host texture memory budget (in megabytes) that will be scaled "
"by the current drawing resolution scale.\n"
"If texture_cache_memory_limit_soft, for instance, is 384, and this is 24, "
"it will be assumed that the game will be using roughly 24 MB of "
"render-to-texture (resolve) targets and 384 - 24 = 360 MB of regular "
"textures - so with 2x2 resolution scaling, the soft limit will be 360 + "
"96 MB, and with 3x3, it will be 360 + 216 MB.",
"GPU");
namespace xe {
namespace gpu {
const TextureCache::LoadShaderInfo
TextureCache::load_shader_info_[kLoadShaderCount] = {
// k8bpb
{3, 4, 1, 4},
// k16bpb
{4, 4, 2, 4},
// k32bpb
{4, 4, 4, 3},
// k64bpb
{4, 4, 8, 2},
// k128bpb
{4, 4, 16, 1},
// kR5G5B5A1ToB5G5R5A1
{4, 4, 2, 4},
// kR5G6B5ToB5G6R5
{4, 4, 2, 4},
// kR5G5B6ToB5G6R5WithRBGASwizzle
{4, 4, 2, 4},
// kRGBA4ToBGRA4
{4, 4, 2, 4},
// kRGBA4ToARGB4
{4, 4, 2, 4},
// kGBGR8ToGRGB8
{4, 4, 4, 3},
// kGBGR8ToRGB8
{4, 4, 8, 3},
// kBGRG8ToRGBG8
{4, 4, 4, 3},
// kBGRG8ToRGB8
{4, 4, 8, 3},
// kR10G11B11ToRGBA16
{4, 4, 8, 3},
// kR10G11B11ToRGBA16SNorm
{4, 4, 8, 3},
// kR11G11B10ToRGBA16
{4, 4, 8, 3},
// kR11G11B10ToRGBA16SNorm
{4, 4, 8, 3},
// kR16UNormToFloat
{4, 4, 2, 4},
// kR16SNormToFloat
{4, 4, 2, 4},
// kRG16UNormToFloat
{4, 4, 4, 3},
// kRG16SNormToFloat
{4, 4, 4, 3},
// kRGBA16UNormToFloat
{4, 4, 8, 2},
// kRGBA16SNormToFloat
{4, 4, 8, 2},
// kDXT1ToRGBA8
{4, 4, 4, 2},
// kDXT3ToRGBA8
{4, 4, 4, 1},
// kDXT5ToRGBA8
{4, 4, 4, 1},
// kDXNToRG8
{4, 4, 2, 1},
// kDXT3A
{4, 4, 1, 2},
// kDXT3AAs1111ToBGRA4
{4, 4, 2, 2},
// kDXT3AAs1111ToARGB4
{4, 4, 2, 2},
// kDXT5AToR8
{4, 4, 1, 2},
// kCTX1
{4, 4, 2, 2},
// kDepthUnorm
{4, 4, 4, 3},
// kDepthFloat
{4, 4, 4, 3},
};
TextureCache::TextureCache(const RegisterFile& register_file,
SharedMemory& shared_memory,
uint32_t draw_resolution_scale_x,
uint32_t draw_resolution_scale_y)
: register_file_(register_file),
shared_memory_(shared_memory),
draw_resolution_scale_x_(draw_resolution_scale_x),
draw_resolution_scale_y_(draw_resolution_scale_y) {
assert_true(draw_resolution_scale_x >= 1);
assert_true(draw_resolution_scale_x <= kMaxDrawResolutionScaleAlongAxis);
assert_true(draw_resolution_scale_y >= 1);
assert_true(draw_resolution_scale_y <= kMaxDrawResolutionScaleAlongAxis);
if (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) {
constexpr uint32_t kScaledResolvePageDwordCount =
SharedMemory::kBufferSize / 4096 / 32;
scaled_resolve_pages_ =
std::unique_ptr<uint32_t[]>(new uint32_t[kScaledResolvePageDwordCount]);
std::memset(scaled_resolve_pages_.get(), 0,
kScaledResolvePageDwordCount * sizeof(uint32_t));
std::memset(scaled_resolve_pages_l2_, 0, sizeof(scaled_resolve_pages_l2_));
scaled_resolve_global_watch_handle_ = shared_memory.RegisterGlobalWatch(
ScaledResolveGlobalWatchCallbackThunk, this);
}
}
TextureCache::~TextureCache() {
DestroyAllTextures(true);
if (scaled_resolve_global_watch_handle_) {
shared_memory().UnregisterGlobalWatch(scaled_resolve_global_watch_handle_);
}
}
bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out,
uint32_t& y_out) {
uint32_t config_x =
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_x));
uint32_t config_y =
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_y));
uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x);
uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y);
x_out = clamped_x;
y_out = clamped_y;
return clamped_x == config_x && clamped_y == config_y;
}
void TextureCache::ClearCache() { DestroyAllTextures(); }
void TextureCache::CompletedSubmissionUpdated(
uint64_t completed_submission_index) {
// If memory usage is too high, destroy unused textures.
uint64_t current_time = xe::Clock::QueryHostUptimeMillis();
// texture_cache_memory_limit_render_to_texture is assumed to be included in
// texture_cache_memory_limit_soft and texture_cache_memory_limit_hard, at 1x,
// so subtracting 1 from the scale.
uint32_t limit_scaled_resolve_add_mb =
cvars::texture_cache_memory_limit_render_to_texture *
(draw_resolution_scale_x() * draw_resolution_scale_y() - 1);
uint32_t limit_soft_mb =
cvars::texture_cache_memory_limit_soft + limit_scaled_resolve_add_mb;
uint32_t limit_hard_mb =
cvars::texture_cache_memory_limit_hard + limit_scaled_resolve_add_mb;
uint32_t limit_soft_lifetime =
cvars::texture_cache_memory_limit_soft_lifetime * 1000;
bool destroyed_any = false;
while (texture_used_first_ != nullptr) {
uint64_t total_host_memory_usage_mb =
(textures_total_host_memory_usage_ + ((UINT32_C(1) << 20) - 1)) >> 20;
bool limit_hard_exceeded = total_host_memory_usage_mb > limit_hard_mb;
if (total_host_memory_usage_mb <= limit_soft_mb && !limit_hard_exceeded) {
break;
}
Texture* texture = texture_used_first_;
if (texture->last_usage_submission_index() > completed_submission_index) {
break;
}
if (!limit_hard_exceeded &&
(texture->last_usage_time() + limit_soft_lifetime) > current_time) {
break;
}
if (!destroyed_any) {
destroyed_any = true;
// The texture being destroyed might have been bound in the previous
// submissions, and nothing has overwritten the binding yet, so completion
// of the submission where the texture was last actually used on the GPU
// doesn't imply that it's not bound currently. Reset bindings if
// any texture has been destroyed.
ResetTextureBindings();
}
// Remove the texture from the map and destroy it via its unique_ptr.
auto found_texture_it = textures_.find(texture->key());
assert_true(found_texture_it != textures_.end());
if (found_texture_it != textures_.end()) {
assert_true(found_texture_it->second.get() == texture);
textures_.erase(found_texture_it);
// `texture` is invalid now.
}
}
if (destroyed_any) {
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
}
}
void TextureCache::BeginSubmission(uint64_t new_submission_index) {
assert_true(new_submission_index > current_submission_index_);
current_submission_index_ = new_submission_index;
current_submission_time_ = xe::Clock::QueryHostUptimeMillis();
}
void TextureCache::BeginFrame() {
// In case there was a failure to create something in the previous frame, make
// sure bindings are reset so a new attempt will surely be made if the texture
// is requested again.
ResetTextureBindings();
}
void TextureCache::MarkRangeAsResolved(uint32_t start_unscaled,
uint32_t length_unscaled) {
if (length_unscaled == 0) {
return;
}
start_unscaled &= 0x1FFFFFFF;
length_unscaled = std::min(length_unscaled, 0x20000000 - start_unscaled);
if (IsDrawResolutionScaled()) {
uint32_t page_first = start_unscaled >> 12;
uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12;
uint32_t block_first = page_first >> 5;
uint32_t block_last = page_last >> 5;
auto global_lock = global_critical_region_.Acquire();
for (uint32_t i = block_first; i <= block_last; ++i) {
uint32_t add_bits = UINT32_MAX;
if (i == block_first) {
add_bits &= ~((UINT32_C(1) << (page_first & 31)) - 1);
}
if (i == block_last && (page_last & 31) != 31) {
add_bits &= (UINT32_C(1) << ((page_last & 31) + 1)) - 1;
}
scaled_resolve_pages_[i] |= add_bits;
scaled_resolve_pages_l2_[i >> 6] |= UINT64_C(1) << (i & 63);
}
}
// Invalidate textures. Toggling individual textures between scaled and
// unscaled also relies on invalidation through shared memory.
shared_memory().RangeWrittenByGpu(start_unscaled, length_unscaled, true);
}
uint32_t TextureCache::GuestToHostSwizzle(uint32_t guest_swizzle,
uint32_t host_format_swizzle) {
uint32_t host_swizzle = 0;
for (uint32_t i = 0; i < 4; ++i) {
uint32_t guest_swizzle_component = (guest_swizzle >> (3 * i)) & 0b111;
uint32_t host_swizzle_component;
if (guest_swizzle_component >= xenos::XE_GPU_TEXTURE_SWIZZLE_0) {
// Get rid of 6 and 7 values (to prevent host GPU errors if the game has
// something broken) the simple way - by changing them to 4 (0) and 5 (1).
host_swizzle_component = guest_swizzle_component & 0b101;
} else {
host_swizzle_component =
(host_format_swizzle >> (3 * guest_swizzle_component)) & 0b111;
}
host_swizzle |= host_swizzle_component << (3 * i);
}
return host_swizzle;
}
void TextureCache::RequestTextures(uint32_t used_texture_mask) {
const auto& regs = register_file();
if (texture_became_outdated_.exchange(false, std::memory_order_acquire)) {
// A texture has become outdated - make sure whether textures are outdated
// is rechecked in this draw and in subsequent ones to reload the new data
// if needed.
ResetTextureBindings();
}
// Update the texture keys and the textures.
uint32_t bindings_changed = 0;
uint32_t textures_remaining = used_texture_mask & ~texture_bindings_in_sync_;
uint32_t index = 0;
while (xe::bit_scan_forward(textures_remaining, &index)) {
uint32_t index_bit = UINT32_C(1) << index;
textures_remaining &= ~index_bit;
TextureBinding& binding = texture_bindings_[index];
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
TextureKey old_key = binding.key;
uint8_t old_swizzled_signs = binding.swizzled_signs;
BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);
texture_bindings_in_sync_ |= index_bit;
if (!binding.key.is_valid) {
if (old_key.is_valid) {
bindings_changed |= index_bit;
}
binding.Reset();
continue;
}
uint32_t old_host_swizzle = binding.host_swizzle;
binding.host_swizzle =
GuestToHostSwizzle(fetch.swizzle, GetHostFormatSwizzle(binding.key));
// Check if need to load the unsigned and the signed versions of the texture
// (if the format is emulated with different host bit representations for
// signed and unsigned - otherwise only the unsigned one is loaded).
bool key_changed = binding.key != old_key;
bool any_sign_was_not_signed =
texture_util::IsAnySignNotSigned(old_swizzled_signs);
bool any_sign_was_signed =
texture_util::IsAnySignSigned(old_swizzled_signs);
bool any_sign_is_not_signed =
texture_util::IsAnySignNotSigned(binding.swizzled_signs);
bool any_sign_is_signed =
texture_util::IsAnySignSigned(binding.swizzled_signs);
if (key_changed || binding.host_swizzle != old_host_swizzle ||
any_sign_is_not_signed != any_sign_was_not_signed ||
any_sign_is_signed != any_sign_was_signed) {
bindings_changed |= index_bit;
}
bool load_unsigned_data = false, load_signed_data = false;
if (IsSignedVersionSeparateForFormat(binding.key)) {
// Can reuse previously loaded unsigned/signed versions if the key is the
// same and the texture was previously bound as unsigned/signed
// respectively (checking the previous values of signedness rather than
// binding.texture != nullptr and binding.texture_signed != nullptr also
// prevents repeated attempts to load the texture if it has failed to
// load).
if (any_sign_is_not_signed) {
if (key_changed || !any_sign_was_not_signed) {
binding.texture = FindOrCreateTexture(binding.key);
load_unsigned_data = true;
}
} else {
binding.texture = nullptr;
}
if (any_sign_is_signed) {
if (key_changed || !any_sign_was_signed) {
TextureKey signed_key = binding.key;
signed_key.signed_separate = 1;
binding.texture_signed = FindOrCreateTexture(signed_key);
load_signed_data = true;
}
} else {
binding.texture_signed = nullptr;
}
} else {
// Same resource for both unsigned and signed, but descriptor formats may
// be different.
if (key_changed) {
binding.texture = FindOrCreateTexture(binding.key);
load_unsigned_data = true;
}
binding.texture_signed = nullptr;
}
if (load_unsigned_data && binding.texture != nullptr) {
LoadTextureData(*binding.texture);
}
if (load_signed_data && binding.texture_signed != nullptr) {
LoadTextureData(*binding.texture_signed);
}
}
if (bindings_changed) {
UpdateTextureBindingsImpl(bindings_changed);
}
}
const char* TextureCache::TextureKey::GetLogDimensionName(
xenos::DataDimension dimension) {
switch (dimension) {
case xenos::DataDimension::k1D:
return "1D";
case xenos::DataDimension::k2DOrStacked:
return "2D";
case xenos::DataDimension::k3D:
return "3D";
case xenos::DataDimension::kCube:
return "cube";
default:
assert_unhandled_case(dimension);
return "unknown";
}
}
void TextureCache::TextureKey::LogAction(const char* action) const {
XELOGGPU(
"{} {} {}{}x{}x{} {} {} texture with {} {}packed mip level{}, "
"base at 0x{:08X} (pitch {}), mips at 0x{:08X}",
action, tiled ? "tiled" : "linear", scaled_resolve ? "scaled " : "",
GetWidth(), GetHeight(), GetDepthOrArraySize(), GetLogDimensionName(),
FormatInfo::Get(format)->name, mip_max_level + 1, packed_mips ? "" : "un",
mip_max_level != 0 ? "s" : "", base_page << 12, pitch << 5,
mip_page << 12);
}
void TextureCache::Texture::LogAction(const char* action) const {
XELOGGPU(
"{} {} {}{}x{}x{} {} {} texture with {} {}packed mip level{}, "
"base at 0x{:08X} (pitch {}, size 0x{:08X}), mips at 0x{:08X} (size "
"0x{:08X})",
action, key_.tiled ? "tiled" : "linear",
key_.scaled_resolve ? "scaled " : "", key_.GetWidth(), key_.GetHeight(),
key_.GetDepthOrArraySize(), key_.GetLogDimensionName(),
FormatInfo::Get(key_.format)->name, key_.mip_max_level + 1,
key_.packed_mips ? "" : "un", key_.mip_max_level != 0 ? "s" : "",
key_.base_page << 12, key_.pitch << 5, GetGuestBaseSize(),
key_.mip_page << 12, GetGuestMipsSize());
}
// The texture must be in the recent usage list. Place it in front now because
// after creation, the texture will likely be used immediately, and it should
// not be destroyed immediately after creation if dropping of old textures is
// performed somehow. The list is maintained by the Texture, not the
// TextureCache itself (unlike the `textures_` container).
TextureCache::Texture::Texture(TextureCache& texture_cache,
const TextureKey& key)
: texture_cache_(texture_cache),
key_(key),
guest_layout_(key.GetGuestLayout()),
base_resolved_(key.scaled_resolve),
mips_resolved_(key.scaled_resolve),
last_usage_submission_index_(texture_cache.current_submission_index_),
last_usage_time_(texture_cache.current_submission_time_),
used_previous_(texture_cache.texture_used_last_),
used_next_(nullptr) {
if (texture_cache.texture_used_last_) {
texture_cache.texture_used_last_->used_next_ = this;
} else {
texture_cache.texture_used_first_ = this;
}
texture_cache.texture_used_last_ = this;
// Never try to upload data that doesn't exist.
base_outdated_ = guest_layout().base.level_data_extent_bytes != 0;
mips_outdated_ = guest_layout().mips_total_extent_bytes != 0;
}
TextureCache::Texture::~Texture() {
if (mips_watch_handle_) {
texture_cache().shared_memory().UnwatchMemoryRange(mips_watch_handle_);
}
if (base_watch_handle_) {
texture_cache().shared_memory().UnwatchMemoryRange(base_watch_handle_);
}
if (used_previous_) {
used_previous_->used_next_ = used_next_;
} else {
texture_cache_.texture_used_first_ = used_next_;
}
if (used_next_) {
used_next_->used_previous_ = used_previous_;
} else {
texture_cache_.texture_used_last_ = used_previous_;
}
texture_cache_.UpdateTexturesTotalHostMemoryUsage(0, host_memory_usage_);
}
void TextureCache::Texture::MakeUpToDateAndWatch(
const std::unique_lock<std::recursive_mutex>& global_lock) {
SharedMemory& shared_memory = texture_cache().shared_memory();
if (base_outdated_) {
assert_not_zero(GetGuestBaseSize());
base_outdated_ = false;
base_watch_handle_ = shared_memory.WatchMemoryRange(
key().base_page << 12, GetGuestBaseSize(), TextureCache::WatchCallback,
this, nullptr, 0);
}
if (mips_outdated_) {
assert_not_zero(GetGuestMipsSize());
mips_outdated_ = false;
mips_watch_handle_ = shared_memory.WatchMemoryRange(
key().mip_page << 12, GetGuestMipsSize(), TextureCache::WatchCallback,
this, nullptr, 1);
}
}
void TextureCache::Texture::MarkAsUsed() {
assert_true(last_usage_submission_index_ <=
texture_cache_.current_submission_index_);
// This is called very frequently, don't relink unless needed for caching.
if (last_usage_submission_index_ >=
texture_cache_.current_submission_index_) {
return;
}
last_usage_submission_index_ = texture_cache_.current_submission_index_;
last_usage_time_ = texture_cache_.current_submission_time_;
if (used_next_ == nullptr) {
// Already the most recently used.
return;
}
if (used_previous_ != nullptr) {
used_previous_->used_next_ = used_next_;
} else {
texture_cache_.texture_used_first_ = used_next_;
}
used_next_->used_previous_ = used_previous_;
used_previous_ = texture_cache_.texture_used_last_;
used_next_ = nullptr;
texture_cache_.texture_used_last_->used_next_ = this;
texture_cache_.texture_used_last_ = this;
}
void TextureCache::Texture::WatchCallback(
[[maybe_unused]] const std::unique_lock<std::recursive_mutex>& global_lock,
bool is_mip) {
if (is_mip) {
assert_not_zero(GetGuestMipsSize());
mips_outdated_ = true;
mips_watch_handle_ = nullptr;
} else {
assert_not_zero(GetGuestBaseSize());
base_outdated_ = true;
base_watch_handle_ = nullptr;
}
}
void TextureCache::WatchCallback(
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
void* data, uint64_t argument, bool invalidated_by_gpu) {
Texture& texture = *static_cast<Texture*>(context);
texture.WatchCallback(global_lock, argument != 0);
texture.texture_cache().texture_became_outdated_.store(
true, std::memory_order_release);
}
void TextureCache::DestroyAllTextures(bool from_destructor) {
ResetTextureBindings(from_destructor);
textures_.clear();
COUNT_profile_set("gpu/texture_cache/textures", 0);
}
TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
// Check if the texture is a scaled resolve texture.
if (IsDrawResolutionScaled() && key.tiled &&
IsScaledResolveSupportedForFormat(key)) {
texture_util::TextureGuestLayout scaled_resolve_guest_layout =
key.GetGuestLayout();
if ((scaled_resolve_guest_layout.base.level_data_extent_bytes &&
IsRangeScaledResolved(
key.base_page << 12,
scaled_resolve_guest_layout.base.level_data_extent_bytes)) ||
(scaled_resolve_guest_layout.mips_total_extent_bytes &&
IsRangeScaledResolved(
key.mip_page << 12,
scaled_resolve_guest_layout.mips_total_extent_bytes))) {
key.scaled_resolve = 1;
}
}
uint32_t host_width = key.GetWidth();
uint32_t host_height = key.GetHeight();
if (key.scaled_resolve) {
host_width *= draw_resolution_scale_x();
host_height *= draw_resolution_scale_y();
}
// With 3x resolution scaling, a 2D texture may become bigger than the
// Direct3D 11 limit, and with 2x, a 3D one as well.
// TODO(Triang3l): Skip mips on Vulkan in this case - the minimum requirement
// there is 4096, which is below the Xenos maximum texture size of 8192.
uint32_t max_host_width_height = GetMaxHostTextureWidthHeight(key.dimension);
uint32_t max_host_depth_or_array_size =
GetMaxHostTextureDepthOrArraySize(key.dimension);
if (host_width > max_host_width_height ||
host_height > max_host_width_height ||
key.GetDepthOrArraySize() > max_host_depth_or_array_size) {
return nullptr;
}
// Try to find an existing texture.
// TODO(Triang3l): Reuse a texture with mip_page unchanged, but base_page
// previously 0, now not 0, to save memory - common case in streaming.
auto found_texture_it = textures_.find(key);
if (found_texture_it != textures_.end()) {
return found_texture_it->second.get();
}
// Create the texture and add it to the map.
Texture* texture;
{
std::unique_ptr<Texture> new_texture = CreateTexture(key);
if (!new_texture) {
key.LogAction("Failed to create");
return nullptr;
}
assert_true(new_texture->key() == key);
texture =
textures_.emplace(key, std::move(new_texture)).first->second.get();
}
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
texture->LogAction("Created");
return texture;
}
bool TextureCache::LoadTextureData(Texture& texture) {
// Check what needs to be uploaded.
bool base_outdated, mips_outdated;
{
auto global_lock = global_critical_region_.Acquire();
base_outdated = texture.base_outdated(global_lock);
mips_outdated = texture.mips_outdated(global_lock);
}
if (!base_outdated && !mips_outdated) {
return true;
}
TextureKey texture_key = texture.key();
// Request uploading of the texture data to the shared memory.
// This is also necessary when resolution scaling is used - the texture cache
// relies on shared memory for invalidation of both unscaled and scaled
// textures. Plus a texture may be unscaled partially, when only a portion of
// its pages is invalidated, in this case we'll need the texture from the
// shared memory to load the unscaled parts.
// TODO(Triang3l): Load unscaled parts.
bool base_resolved = texture.GetBaseResolved();
if (base_outdated) {
if (!shared_memory().RequestRange(
texture_key.base_page << 12, texture.GetGuestBaseSize(),
texture_key.scaled_resolve ? nullptr : &base_resolved)) {
return false;
}
}
bool mips_resolved = texture.GetMipsResolved();
if (mips_outdated) {
if (!shared_memory().RequestRange(
texture_key.mip_page << 12, texture.GetGuestMipsSize(),
texture_key.scaled_resolve ? nullptr : &mips_resolved)) {
return false;
}
}
if (texture_key.scaled_resolve) {
// Make sure all the scaled resolve memory is resident and accessible from
// the shader, including any possible padding that hasn't yet been touched
// by an actual resolve, but is still included in the texture size, so the
// GPU won't be trying to access unmapped memory.
if (!EnsureScaledResolveMemoryCommitted(texture_key.base_page << 12,
texture.GetGuestBaseSize())) {
return false;
}
if (!EnsureScaledResolveMemoryCommitted(texture_key.mip_page << 12,
texture.GetGuestMipsSize())) {
return false;
}
}
// Actually load the texture data.
if (!LoadTextureDataFromResidentMemoryImpl(texture, base_outdated,
mips_outdated)) {
return false;
}
// Update the source of the texture (resolve vs. CPU or memexport) for
// purposes of handling piecewise gamma emulation via sRGB and for resolution
// scale in sampling offsets.
if (!texture_key.scaled_resolve) {
texture.SetBaseResolved(base_resolved);
texture.SetMipsResolved(mips_resolved);
}
// Mark the ranges as uploaded and watch them. This is needed for scaled
// resolves as well to detect when the CPU wants to reuse the memory for a
// regular texture or a vertex buffer, and thus the scaled resolve version is
// not up to date anymore.
texture.MakeUpToDateAndWatch(global_critical_region_.Acquire());
texture.LogAction("Loaded");
return true;
}
void TextureCache::BindingInfoFromFetchConstant(
const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out,
uint8_t* swizzled_signs_out) {
// Reset the key and the signedness.
key_out.MakeInvalid();
if (swizzled_signs_out != nullptr) {
*swizzled_signs_out =
uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101);
}
switch (fetch.type) {
case xenos::FetchConstantType::kTexture:
break;
case xenos::FetchConstantType::kInvalidTexture:
if (cvars::gpu_allow_invalid_fetch_constants) {
break;
}
XELOGW(
"Texture fetch constant ({:08X} {:08X} {:08X} {:08X} {:08X} {:08X}) "
"has \"invalid\" type! This is incorrect behavior, but you can try "
"bypassing this by launching Xenia with "
"--gpu_allow_invalid_fetch_constants=true.",
fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3,
fetch.dword_4, fetch.dword_5);
return;
default:
XELOGW(
"Texture fetch constant ({:08X} {:08X} {:08X} {:08X} {:08X} {:08X}) "
"is completely invalid!",
fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3,
fetch.dword_4, fetch.dword_5);
return;
}
uint32_t width_minus_1, height_minus_1, depth_or_array_size_minus_1;
uint32_t base_page, mip_page, mip_max_level;
texture_util::GetSubresourcesFromFetchConstant(
fetch, &width_minus_1, &height_minus_1, &depth_or_array_size_minus_1,
&base_page, &mip_page, nullptr, &mip_max_level);
if (base_page == 0 && mip_page == 0) {
// No texture data at all.
return;
}
if (fetch.dimension == xenos::DataDimension::k1D) {
bool is_invalid_1d = false;
// TODO(Triang3l): Support long 1D textures.
if (width_minus_1 >= xenos::kTexture2DCubeMaxWidthHeight) {
XELOGE(
"1D texture is too wide ({}) - ignoring! Report the game to Xenia "
"developers",
width_minus_1 + 1);
is_invalid_1d = true;
}
assert_false(fetch.tiled);
if (fetch.tiled) {
XELOGE(
"1D texture has tiling enabled in the fetch constant, but this "
"appears to be completely wrong - ignoring! Report the game to Xenia "
"developers");
is_invalid_1d = true;
}
assert_false(fetch.packed_mips);
if (fetch.packed_mips) {
XELOGE(
"1D texture has packed mips enabled in the fetch constant, but this "
"appears to be completely wrong - ignoring! Report the game to Xenia "
"developers");
is_invalid_1d = true;
}
if (is_invalid_1d) {
return;
}
}
xenos::TextureFormat format = GetBaseFormat(fetch.format);
key_out.base_page = base_page;
key_out.mip_page = mip_page;
key_out.dimension = fetch.dimension;
key_out.width_minus_1 = width_minus_1;
key_out.height_minus_1 = height_minus_1;
key_out.depth_or_array_size_minus_1 = depth_or_array_size_minus_1;
key_out.pitch = fetch.pitch;
key_out.mip_max_level = mip_max_level;
key_out.tiled = fetch.tiled;
key_out.packed_mips = fetch.packed_mips;
key_out.format = format;
key_out.endianness = fetch.endianness;
key_out.is_valid = 1;
if (swizzled_signs_out != nullptr) {
*swizzled_signs_out = texture_util::SwizzleSigns(fetch);
}
}
void TextureCache::ResetTextureBindings(bool from_destructor) {
uint32_t bindings_reset = 0;
for (size_t i = 0; i < texture_bindings_.size(); ++i) {
TextureBinding& binding = texture_bindings_[i];
if (!binding.key.is_valid) {
continue;
}
binding.Reset();
bindings_reset |= UINT32_C(1) << i;
}
texture_bindings_in_sync_ &= ~bindings_reset;
if (!from_destructor && bindings_reset) {
UpdateTextureBindingsImpl(bindings_reset);
}
}
void TextureCache::UpdateTexturesTotalHostMemoryUsage(uint64_t add,
uint64_t subtract) {
textures_total_host_memory_usage_ =
textures_total_host_memory_usage_ - subtract + add;
COUNT_profile_set("gpu/texture_cache/total_host_memory_usage_mb",
uint32_t((textures_total_host_memory_usage_ +
((UINT32_C(1) << 20) - 1)) >>
20));
}
bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled,
uint32_t length_unscaled) {
if (!IsDrawResolutionScaled()) {
return false;
}
start_unscaled = std::min(start_unscaled, SharedMemory::kBufferSize);
length_unscaled =
std::min(length_unscaled, SharedMemory::kBufferSize - start_unscaled);
if (!length_unscaled) {
return false;
}
// Two-level check for faster rejection since resolve targets are usually
// placed in relatively small and localized memory portions (confirmed by
// testing - pretty much all times the deeper level was entered, the texture
// was a resolve target).
uint32_t page_first = start_unscaled >> 12;
uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12;
uint32_t block_first = page_first >> 5;
uint32_t block_last = page_last >> 5;
uint32_t l2_block_first = block_first >> 6;
uint32_t l2_block_last = block_last >> 6;
auto global_lock = global_critical_region_.Acquire();
for (uint32_t i = l2_block_first; i <= l2_block_last; ++i) {
uint64_t l2_block = scaled_resolve_pages_l2_[i];
if (i == l2_block_first) {
l2_block &= ~((UINT64_C(1) << (block_first & 63)) - 1);
}
if (i == l2_block_last && (block_last & 63) != 63) {
l2_block &= (UINT64_C(1) << ((block_last & 63) + 1)) - 1;
}
uint32_t block_relative_index;
while (xe::bit_scan_forward(l2_block, &block_relative_index)) {
l2_block &= ~(UINT64_C(1) << block_relative_index);
uint32_t block_index = (i << 6) + block_relative_index;
uint32_t check_bits = UINT32_MAX;
if (block_index == block_first) {
check_bits &= ~((UINT32_C(1) << (page_first & 31)) - 1);
}
if (block_index == block_last && (page_last & 31) != 31) {
check_bits &= (UINT32_C(1) << ((page_last & 31) + 1)) - 1;
}
if (scaled_resolve_pages_[block_index] & check_bits) {
return true;
}
}
}
return false;
}
void TextureCache::ScaledResolveGlobalWatchCallbackThunk(
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
TextureCache* texture_cache = reinterpret_cast<TextureCache*>(context);
texture_cache->ScaledResolveGlobalWatchCallback(
global_lock, address_first, address_last, invalidated_by_gpu);
}
void TextureCache::ScaledResolveGlobalWatchCallback(
const std::unique_lock<std::recursive_mutex>& global_lock,
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
assert_true(IsDrawResolutionScaled());
if (invalidated_by_gpu) {
// Resolves themselves do exactly the opposite of what this should do.
return;
}
// Mark scaled resolve ranges as non-scaled. Textures themselves will be
// invalidated by their shared memory watches.
uint32_t resolve_page_first = address_first >> 12;
uint32_t resolve_page_last = address_last >> 12;
uint32_t resolve_block_first = resolve_page_first >> 5;
uint32_t resolve_block_last = resolve_page_last >> 5;
uint32_t resolve_l2_block_first = resolve_block_first >> 6;
uint32_t resolve_l2_block_last = resolve_block_last >> 6;
for (uint32_t i = resolve_l2_block_first; i <= resolve_l2_block_last; ++i) {
uint64_t resolve_l2_block = scaled_resolve_pages_l2_[i];
uint32_t resolve_block_relative_index;
while (
xe::bit_scan_forward(resolve_l2_block, &resolve_block_relative_index)) {
resolve_l2_block &= ~(UINT64_C(1) << resolve_block_relative_index);
uint32_t resolve_block_index = (i << 6) + resolve_block_relative_index;
uint32_t resolve_keep_bits = 0;
if (resolve_block_index == resolve_block_first) {
resolve_keep_bits |= (UINT32_C(1) << (resolve_page_first & 31)) - 1;
}
if (resolve_block_index == resolve_block_last &&
(resolve_page_last & 31) != 31) {
resolve_keep_bits |=
~((UINT32_C(1) << ((resolve_page_last & 31) + 1)) - 1);
}
scaled_resolve_pages_[resolve_block_index] &= resolve_keep_bits;
if (scaled_resolve_pages_[resolve_block_index] == 0) {
scaled_resolve_pages_l2_[i] &=
~(UINT64_C(1) << resolve_block_relative_index);
}
}
}
}
} // namespace gpu
} // namespace xe