2022-05-14 13:18:10 +00:00
|
|
|
/**
|
|
|
|
******************************************************************************
|
|
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
|
|
******************************************************************************
|
|
|
|
* Copyright 2022 Ben Vanik. All rights reserved. *
|
|
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
|
|
******************************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "xenia/gpu/texture_cache.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
#include "xenia/base/assert.h"
|
|
|
|
#include "xenia/base/clock.h"
|
|
|
|
#include "xenia/base/cvar.h"
|
|
|
|
#include "xenia/base/logging.h"
|
|
|
|
#include "xenia/base/math.h"
|
|
|
|
#include "xenia/base/profiling.h"
|
|
|
|
#include "xenia/gpu/gpu_flags.h"
|
|
|
|
#include "xenia/gpu/register_file.h"
|
|
|
|
#include "xenia/gpu/texture_info.h"
|
|
|
|
#include "xenia/gpu/texture_util.h"
|
|
|
|
#include "xenia/gpu/xenos.h"
|
|
|
|
|
|
|
|
DEFINE_int32(
|
|
|
|
draw_resolution_scale_x, 1,
|
|
|
|
"Integer pixel width scale used for scaling the rendering resolution "
|
|
|
|
"opaquely to the game.\n"
|
|
|
|
"1, 2 and 3 may be supported, but support of anything above 1 depends on "
|
|
|
|
"the device properties, such as whether it supports sparse binding / tiled "
|
|
|
|
"resources, the number of virtual address bits per resource, and other "
|
|
|
|
"factors.\n"
|
|
|
|
"Various effects and parts of game rendering pipelines may work "
|
|
|
|
"incorrectly as pixels become ambiguous from the game's perspective and "
|
|
|
|
"because half-pixel offset (which normally doesn't affect coverage when "
|
|
|
|
"MSAA isn't used) becomes full-pixel.",
|
|
|
|
"GPU");
|
|
|
|
DEFINE_int32(
|
|
|
|
draw_resolution_scale_y, 1,
|
|
|
|
"Integer pixel width scale used for scaling the rendering resolution "
|
|
|
|
"opaquely to the game.\n"
|
|
|
|
"See draw_resolution_scale_x for more information.",
|
|
|
|
"GPU");
|
|
|
|
DEFINE_uint32(
|
|
|
|
texture_cache_memory_limit_soft, 384,
|
|
|
|
"Maximum host texture memory usage (in megabytes) above which old textures "
|
|
|
|
"will be destroyed.",
|
|
|
|
"GPU");
|
|
|
|
DEFINE_uint32(
|
|
|
|
texture_cache_memory_limit_soft_lifetime, 30,
|
|
|
|
"Seconds a texture should be unused to be considered old enough to be "
|
|
|
|
"deleted if texture memory usage exceeds texture_cache_memory_limit_soft.",
|
|
|
|
"GPU");
|
|
|
|
DEFINE_uint32(
|
|
|
|
texture_cache_memory_limit_hard, 768,
|
|
|
|
"Maximum host texture memory usage (in megabytes) above which textures "
|
|
|
|
"will be destroyed as soon as possible.",
|
|
|
|
"GPU");
|
|
|
|
DEFINE_uint32(
|
|
|
|
texture_cache_memory_limit_render_to_texture, 24,
|
|
|
|
"Part of the host texture memory budget (in megabytes) that will be scaled "
|
|
|
|
"by the current drawing resolution scale.\n"
|
|
|
|
"If texture_cache_memory_limit_soft, for instance, is 384, and this is 24, "
|
|
|
|
"it will be assumed that the game will be using roughly 24 MB of "
|
|
|
|
"render-to-texture (resolve) targets and 384 - 24 = 360 MB of regular "
|
|
|
|
"textures - so with 2x2 resolution scaling, the soft limit will be 360 + "
|
|
|
|
"96 MB, and with 3x3, it will be 360 + 216 MB.",
|
|
|
|
"GPU");
|
|
|
|
|
|
|
|
namespace xe {
|
|
|
|
namespace gpu {
|
|
|
|
|
2022-05-24 19:24:33 +00:00
|
|
|
const TextureCache::LoadShaderInfo
|
|
|
|
TextureCache::load_shader_info_[kLoadShaderCount] = {
|
|
|
|
// k8bpb
|
|
|
|
{3, 4, 1, 4},
|
|
|
|
// k16bpb
|
|
|
|
{4, 4, 2, 4},
|
|
|
|
// k32bpb
|
|
|
|
{4, 4, 4, 3},
|
|
|
|
// k64bpb
|
|
|
|
{4, 4, 8, 2},
|
|
|
|
// k128bpb
|
|
|
|
{4, 4, 16, 1},
|
|
|
|
// kR5G5B5A1ToB5G5R5A1
|
|
|
|
{4, 4, 2, 4},
|
|
|
|
// kR5G6B5ToB5G6R5
|
|
|
|
{4, 4, 2, 4},
|
|
|
|
// kR5G5B6ToB5G6R5WithRBGASwizzle
|
|
|
|
{4, 4, 2, 4},
|
|
|
|
// kRGBA4ToBGRA4
|
|
|
|
{4, 4, 2, 4},
|
|
|
|
// kRGBA4ToARGB4
|
|
|
|
{4, 4, 2, 4},
|
|
|
|
// kGBGR8ToGRGB8
|
|
|
|
{4, 4, 4, 3},
|
|
|
|
// kGBGR8ToRGB8
|
|
|
|
{4, 4, 8, 3},
|
|
|
|
// kBGRG8ToRGBG8
|
|
|
|
{4, 4, 4, 3},
|
|
|
|
// kBGRG8ToRGB8
|
|
|
|
{4, 4, 8, 3},
|
|
|
|
// kR10G11B11ToRGBA16
|
|
|
|
{4, 4, 8, 3},
|
|
|
|
// kR10G11B11ToRGBA16SNorm
|
|
|
|
{4, 4, 8, 3},
|
|
|
|
// kR11G11B10ToRGBA16
|
|
|
|
{4, 4, 8, 3},
|
|
|
|
// kR11G11B10ToRGBA16SNorm
|
|
|
|
{4, 4, 8, 3},
|
|
|
|
// kR16UNormToFloat
|
|
|
|
{4, 4, 2, 4},
|
|
|
|
// kR16SNormToFloat
|
|
|
|
{4, 4, 2, 4},
|
|
|
|
// kRG16UNormToFloat
|
|
|
|
{4, 4, 4, 3},
|
|
|
|
// kRG16SNormToFloat
|
|
|
|
{4, 4, 4, 3},
|
|
|
|
// kRGBA16UNormToFloat
|
|
|
|
{4, 4, 8, 2},
|
|
|
|
// kRGBA16SNormToFloat
|
|
|
|
{4, 4, 8, 2},
|
|
|
|
// kDXT1ToRGBA8
|
|
|
|
{4, 4, 4, 2},
|
|
|
|
// kDXT3ToRGBA8
|
|
|
|
{4, 4, 4, 1},
|
|
|
|
// kDXT5ToRGBA8
|
|
|
|
{4, 4, 4, 1},
|
|
|
|
// kDXNToRG8
|
|
|
|
{4, 4, 2, 1},
|
|
|
|
// kDXT3A
|
|
|
|
{4, 4, 1, 2},
|
|
|
|
// kDXT3AAs1111ToBGRA4
|
|
|
|
{4, 4, 2, 2},
|
|
|
|
// kDXT3AAs1111ToARGB4
|
|
|
|
{4, 4, 2, 2},
|
|
|
|
// kDXT5AToR8
|
|
|
|
{4, 4, 1, 2},
|
|
|
|
// kCTX1
|
|
|
|
{4, 4, 2, 2},
|
|
|
|
// kDepthUnorm
|
|
|
|
{4, 4, 4, 3},
|
|
|
|
// kDepthFloat
|
|
|
|
{4, 4, 4, 3},
|
|
|
|
};
|
|
|
|
|
2022-05-14 13:18:10 +00:00
|
|
|
TextureCache::TextureCache(const RegisterFile& register_file,
|
|
|
|
SharedMemory& shared_memory,
|
|
|
|
uint32_t draw_resolution_scale_x,
|
|
|
|
uint32_t draw_resolution_scale_y)
|
|
|
|
: register_file_(register_file),
|
|
|
|
shared_memory_(shared_memory),
|
|
|
|
draw_resolution_scale_x_(draw_resolution_scale_x),
|
|
|
|
draw_resolution_scale_y_(draw_resolution_scale_y) {
|
|
|
|
assert_true(draw_resolution_scale_x >= 1);
|
|
|
|
assert_true(draw_resolution_scale_x <= kMaxDrawResolutionScaleAlongAxis);
|
|
|
|
assert_true(draw_resolution_scale_y >= 1);
|
|
|
|
assert_true(draw_resolution_scale_y <= kMaxDrawResolutionScaleAlongAxis);
|
|
|
|
|
|
|
|
if (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) {
|
|
|
|
constexpr uint32_t kScaledResolvePageDwordCount =
|
|
|
|
SharedMemory::kBufferSize / 4096 / 32;
|
|
|
|
scaled_resolve_pages_ =
|
|
|
|
std::unique_ptr<uint32_t[]>(new uint32_t[kScaledResolvePageDwordCount]);
|
|
|
|
std::memset(scaled_resolve_pages_.get(), 0,
|
|
|
|
kScaledResolvePageDwordCount * sizeof(uint32_t));
|
|
|
|
std::memset(scaled_resolve_pages_l2_, 0, sizeof(scaled_resolve_pages_l2_));
|
|
|
|
scaled_resolve_global_watch_handle_ = shared_memory.RegisterGlobalWatch(
|
|
|
|
ScaledResolveGlobalWatchCallbackThunk, this);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TextureCache::~TextureCache() {
|
|
|
|
DestroyAllTextures(true);
|
|
|
|
|
|
|
|
if (scaled_resolve_global_watch_handle_) {
|
|
|
|
shared_memory().UnregisterGlobalWatch(scaled_resolve_global_watch_handle_);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out,
|
|
|
|
uint32_t& y_out) {
|
|
|
|
uint32_t config_x =
|
|
|
|
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_x));
|
|
|
|
uint32_t config_y =
|
|
|
|
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_y));
|
|
|
|
uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x);
|
|
|
|
uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y);
|
|
|
|
x_out = clamped_x;
|
|
|
|
y_out = clamped_y;
|
|
|
|
return clamped_x == config_x && clamped_y == config_y;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::ClearCache() { DestroyAllTextures(); }
|
|
|
|
|
|
|
|
void TextureCache::CompletedSubmissionUpdated(
|
|
|
|
uint64_t completed_submission_index) {
|
|
|
|
// If memory usage is too high, destroy unused textures.
|
|
|
|
uint64_t current_time = xe::Clock::QueryHostUptimeMillis();
|
|
|
|
// texture_cache_memory_limit_render_to_texture is assumed to be included in
|
|
|
|
// texture_cache_memory_limit_soft and texture_cache_memory_limit_hard, at 1x,
|
|
|
|
// so subtracting 1 from the scale.
|
|
|
|
uint32_t limit_scaled_resolve_add_mb =
|
|
|
|
cvars::texture_cache_memory_limit_render_to_texture *
|
|
|
|
(draw_resolution_scale_x() * draw_resolution_scale_y() - 1);
|
|
|
|
uint32_t limit_soft_mb =
|
|
|
|
cvars::texture_cache_memory_limit_soft + limit_scaled_resolve_add_mb;
|
|
|
|
uint32_t limit_hard_mb =
|
|
|
|
cvars::texture_cache_memory_limit_hard + limit_scaled_resolve_add_mb;
|
|
|
|
uint32_t limit_soft_lifetime =
|
|
|
|
cvars::texture_cache_memory_limit_soft_lifetime * 1000;
|
|
|
|
bool destroyed_any = false;
|
|
|
|
while (texture_used_first_ != nullptr) {
|
|
|
|
uint64_t total_host_memory_usage_mb =
|
|
|
|
(textures_total_host_memory_usage_ + ((UINT32_C(1) << 20) - 1)) >> 20;
|
|
|
|
bool limit_hard_exceeded = total_host_memory_usage_mb > limit_hard_mb;
|
|
|
|
if (total_host_memory_usage_mb <= limit_soft_mb && !limit_hard_exceeded) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Texture* texture = texture_used_first_;
|
|
|
|
if (texture->last_usage_submission_index() > completed_submission_index) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!limit_hard_exceeded &&
|
|
|
|
(texture->last_usage_time() + limit_soft_lifetime) > current_time) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!destroyed_any) {
|
|
|
|
destroyed_any = true;
|
|
|
|
// The texture being destroyed might have been bound in the previous
|
|
|
|
// submissions, and nothing has overwritten the binding yet, so completion
|
|
|
|
// of the submission where the texture was last actually used on the GPU
|
|
|
|
// doesn't imply that it's not bound currently. Reset bindings if
|
|
|
|
// any texture has been destroyed.
|
|
|
|
ResetTextureBindings();
|
|
|
|
}
|
|
|
|
// Remove the texture from the map and destroy it via its unique_ptr.
|
|
|
|
auto found_texture_it = textures_.find(texture->key());
|
|
|
|
assert_true(found_texture_it != textures_.end());
|
|
|
|
if (found_texture_it != textures_.end()) {
|
|
|
|
assert_true(found_texture_it->second.get() == texture);
|
|
|
|
textures_.erase(found_texture_it);
|
|
|
|
// `texture` is invalid now.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (destroyed_any) {
|
|
|
|
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::BeginSubmission(uint64_t new_submission_index) {
|
|
|
|
assert_true(new_submission_index > current_submission_index_);
|
|
|
|
current_submission_index_ = new_submission_index;
|
|
|
|
current_submission_time_ = xe::Clock::QueryHostUptimeMillis();
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::BeginFrame() {
|
|
|
|
// In case there was a failure to create something in the previous frame, make
|
|
|
|
// sure bindings are reset so a new attempt will surely be made if the texture
|
|
|
|
// is requested again.
|
|
|
|
ResetTextureBindings();
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::MarkRangeAsResolved(uint32_t start_unscaled,
|
|
|
|
uint32_t length_unscaled) {
|
|
|
|
if (length_unscaled == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
start_unscaled &= 0x1FFFFFFF;
|
|
|
|
length_unscaled = std::min(length_unscaled, 0x20000000 - start_unscaled);
|
|
|
|
|
|
|
|
if (IsDrawResolutionScaled()) {
|
|
|
|
uint32_t page_first = start_unscaled >> 12;
|
|
|
|
uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12;
|
|
|
|
uint32_t block_first = page_first >> 5;
|
|
|
|
uint32_t block_last = page_last >> 5;
|
|
|
|
auto global_lock = global_critical_region_.Acquire();
|
|
|
|
for (uint32_t i = block_first; i <= block_last; ++i) {
|
|
|
|
uint32_t add_bits = UINT32_MAX;
|
|
|
|
if (i == block_first) {
|
|
|
|
add_bits &= ~((UINT32_C(1) << (page_first & 31)) - 1);
|
|
|
|
}
|
|
|
|
if (i == block_last && (page_last & 31) != 31) {
|
|
|
|
add_bits &= (UINT32_C(1) << ((page_last & 31) + 1)) - 1;
|
|
|
|
}
|
|
|
|
scaled_resolve_pages_[i] |= add_bits;
|
|
|
|
scaled_resolve_pages_l2_[i >> 6] |= UINT64_C(1) << (i & 63);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Invalidate textures. Toggling individual textures between scaled and
|
|
|
|
// unscaled also relies on invalidation through shared memory.
|
|
|
|
shared_memory().RangeWrittenByGpu(start_unscaled, length_unscaled, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t TextureCache::GuestToHostSwizzle(uint32_t guest_swizzle,
|
|
|
|
uint32_t host_format_swizzle) {
|
|
|
|
uint32_t host_swizzle = 0;
|
|
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
|
|
uint32_t guest_swizzle_component = (guest_swizzle >> (3 * i)) & 0b111;
|
|
|
|
uint32_t host_swizzle_component;
|
|
|
|
if (guest_swizzle_component >= xenos::XE_GPU_TEXTURE_SWIZZLE_0) {
|
|
|
|
// Get rid of 6 and 7 values (to prevent host GPU errors if the game has
|
|
|
|
// something broken) the simple way - by changing them to 4 (0) and 5 (1).
|
|
|
|
host_swizzle_component = guest_swizzle_component & 0b101;
|
|
|
|
} else {
|
|
|
|
host_swizzle_component =
|
|
|
|
(host_format_swizzle >> (3 * guest_swizzle_component)) & 0b111;
|
|
|
|
}
|
|
|
|
host_swizzle |= host_swizzle_component << (3 * i);
|
|
|
|
}
|
|
|
|
return host_swizzle;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
|
|
|
const auto& regs = register_file();
|
|
|
|
|
|
|
|
if (texture_became_outdated_.exchange(false, std::memory_order_acquire)) {
|
|
|
|
// A texture has become outdated - make sure whether textures are outdated
|
|
|
|
// is rechecked in this draw and in subsequent ones to reload the new data
|
|
|
|
// if needed.
|
|
|
|
ResetTextureBindings();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the texture keys and the textures.
|
|
|
|
uint32_t bindings_changed = 0;
|
|
|
|
uint32_t textures_remaining = used_texture_mask & ~texture_bindings_in_sync_;
|
|
|
|
uint32_t index = 0;
|
|
|
|
while (xe::bit_scan_forward(textures_remaining, &index)) {
|
|
|
|
uint32_t index_bit = UINT32_C(1) << index;
|
|
|
|
textures_remaining &= ~index_bit;
|
|
|
|
TextureBinding& binding = texture_bindings_[index];
|
|
|
|
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
|
|
|
|
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
|
|
|
|
TextureKey old_key = binding.key;
|
|
|
|
uint8_t old_swizzled_signs = binding.swizzled_signs;
|
|
|
|
BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);
|
|
|
|
texture_bindings_in_sync_ |= index_bit;
|
|
|
|
if (!binding.key.is_valid) {
|
|
|
|
if (old_key.is_valid) {
|
|
|
|
bindings_changed |= index_bit;
|
|
|
|
}
|
|
|
|
binding.Reset();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
uint32_t old_host_swizzle = binding.host_swizzle;
|
|
|
|
binding.host_swizzle =
|
|
|
|
GuestToHostSwizzle(fetch.swizzle, GetHostFormatSwizzle(binding.key));
|
|
|
|
|
|
|
|
// Check if need to load the unsigned and the signed versions of the texture
|
|
|
|
// (if the format is emulated with different host bit representations for
|
|
|
|
// signed and unsigned - otherwise only the unsigned one is loaded).
|
|
|
|
bool key_changed = binding.key != old_key;
|
|
|
|
bool any_sign_was_not_signed =
|
|
|
|
texture_util::IsAnySignNotSigned(old_swizzled_signs);
|
|
|
|
bool any_sign_was_signed =
|
|
|
|
texture_util::IsAnySignSigned(old_swizzled_signs);
|
|
|
|
bool any_sign_is_not_signed =
|
|
|
|
texture_util::IsAnySignNotSigned(binding.swizzled_signs);
|
|
|
|
bool any_sign_is_signed =
|
|
|
|
texture_util::IsAnySignSigned(binding.swizzled_signs);
|
|
|
|
if (key_changed || binding.host_swizzle != old_host_swizzle ||
|
|
|
|
any_sign_is_not_signed != any_sign_was_not_signed ||
|
|
|
|
any_sign_is_signed != any_sign_was_signed) {
|
|
|
|
bindings_changed |= index_bit;
|
|
|
|
}
|
|
|
|
bool load_unsigned_data = false, load_signed_data = false;
|
|
|
|
if (IsSignedVersionSeparateForFormat(binding.key)) {
|
|
|
|
// Can reuse previously loaded unsigned/signed versions if the key is the
|
|
|
|
// same and the texture was previously bound as unsigned/signed
|
|
|
|
// respectively (checking the previous values of signedness rather than
|
|
|
|
// binding.texture != nullptr and binding.texture_signed != nullptr also
|
|
|
|
// prevents repeated attempts to load the texture if it has failed to
|
|
|
|
// load).
|
|
|
|
if (any_sign_is_not_signed) {
|
|
|
|
if (key_changed || !any_sign_was_not_signed) {
|
|
|
|
binding.texture = FindOrCreateTexture(binding.key);
|
|
|
|
load_unsigned_data = true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
binding.texture = nullptr;
|
|
|
|
}
|
|
|
|
if (any_sign_is_signed) {
|
|
|
|
if (key_changed || !any_sign_was_signed) {
|
|
|
|
TextureKey signed_key = binding.key;
|
|
|
|
signed_key.signed_separate = 1;
|
|
|
|
binding.texture_signed = FindOrCreateTexture(signed_key);
|
|
|
|
load_signed_data = true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
binding.texture_signed = nullptr;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Same resource for both unsigned and signed, but descriptor formats may
|
|
|
|
// be different.
|
|
|
|
if (key_changed) {
|
|
|
|
binding.texture = FindOrCreateTexture(binding.key);
|
|
|
|
load_unsigned_data = true;
|
|
|
|
}
|
|
|
|
binding.texture_signed = nullptr;
|
|
|
|
}
|
|
|
|
if (load_unsigned_data && binding.texture != nullptr) {
|
|
|
|
LoadTextureData(*binding.texture);
|
|
|
|
}
|
|
|
|
if (load_signed_data && binding.texture_signed != nullptr) {
|
|
|
|
LoadTextureData(*binding.texture_signed);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (bindings_changed) {
|
|
|
|
UpdateTextureBindingsImpl(bindings_changed);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* TextureCache::TextureKey::GetLogDimensionName(
|
|
|
|
xenos::DataDimension dimension) {
|
|
|
|
switch (dimension) {
|
|
|
|
case xenos::DataDimension::k1D:
|
|
|
|
return "1D";
|
|
|
|
case xenos::DataDimension::k2DOrStacked:
|
|
|
|
return "2D";
|
|
|
|
case xenos::DataDimension::k3D:
|
|
|
|
return "3D";
|
|
|
|
case xenos::DataDimension::kCube:
|
|
|
|
return "cube";
|
|
|
|
default:
|
|
|
|
assert_unhandled_case(dimension);
|
|
|
|
return "unknown";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::TextureKey::LogAction(const char* action) const {
|
|
|
|
XELOGGPU(
|
|
|
|
"{} {} {}{}x{}x{} {} {} texture with {} {}packed mip level{}, "
|
|
|
|
"base at 0x{:08X} (pitch {}), mips at 0x{:08X}",
|
|
|
|
action, tiled ? "tiled" : "linear", scaled_resolve ? "scaled " : "",
|
|
|
|
GetWidth(), GetHeight(), GetDepthOrArraySize(), GetLogDimensionName(),
|
|
|
|
FormatInfo::Get(format)->name, mip_max_level + 1, packed_mips ? "" : "un",
|
|
|
|
mip_max_level != 0 ? "s" : "", base_page << 12, pitch << 5,
|
|
|
|
mip_page << 12);
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::Texture::LogAction(const char* action) const {
|
|
|
|
XELOGGPU(
|
|
|
|
"{} {} {}{}x{}x{} {} {} texture with {} {}packed mip level{}, "
|
|
|
|
"base at 0x{:08X} (pitch {}, size 0x{:08X}), mips at 0x{:08X} (size "
|
|
|
|
"0x{:08X})",
|
|
|
|
action, key_.tiled ? "tiled" : "linear",
|
|
|
|
key_.scaled_resolve ? "scaled " : "", key_.GetWidth(), key_.GetHeight(),
|
|
|
|
key_.GetDepthOrArraySize(), key_.GetLogDimensionName(),
|
|
|
|
FormatInfo::Get(key_.format)->name, key_.mip_max_level + 1,
|
|
|
|
key_.packed_mips ? "" : "un", key_.mip_max_level != 0 ? "s" : "",
|
|
|
|
key_.base_page << 12, key_.pitch << 5, GetGuestBaseSize(),
|
|
|
|
key_.mip_page << 12, GetGuestMipsSize());
|
|
|
|
}
|
|
|
|
|
|
|
|
// The texture must be in the recent usage list. Place it in front now because
|
|
|
|
// after creation, the texture will likely be used immediately, and it should
|
|
|
|
// not be destroyed immediately after creation if dropping of old textures is
|
|
|
|
// performed somehow. The list is maintained by the Texture, not the
|
|
|
|
// TextureCache itself (unlike the `textures_` container).
|
|
|
|
TextureCache::Texture::Texture(TextureCache& texture_cache,
|
|
|
|
const TextureKey& key)
|
|
|
|
: texture_cache_(texture_cache),
|
|
|
|
key_(key),
|
|
|
|
guest_layout_(key.GetGuestLayout()),
|
|
|
|
base_resolved_(key.scaled_resolve),
|
|
|
|
mips_resolved_(key.scaled_resolve),
|
|
|
|
last_usage_submission_index_(texture_cache.current_submission_index_),
|
|
|
|
last_usage_time_(texture_cache.current_submission_time_),
|
|
|
|
used_previous_(texture_cache.texture_used_last_),
|
|
|
|
used_next_(nullptr) {
|
|
|
|
if (texture_cache.texture_used_last_) {
|
|
|
|
texture_cache.texture_used_last_->used_next_ = this;
|
|
|
|
} else {
|
|
|
|
texture_cache.texture_used_first_ = this;
|
|
|
|
}
|
|
|
|
texture_cache.texture_used_last_ = this;
|
|
|
|
|
|
|
|
// Never try to upload data that doesn't exist.
|
|
|
|
base_outdated_ = guest_layout().base.level_data_extent_bytes != 0;
|
|
|
|
mips_outdated_ = guest_layout().mips_total_extent_bytes != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
TextureCache::Texture::~Texture() {
|
|
|
|
if (mips_watch_handle_) {
|
|
|
|
texture_cache().shared_memory().UnwatchMemoryRange(mips_watch_handle_);
|
|
|
|
}
|
|
|
|
if (base_watch_handle_) {
|
|
|
|
texture_cache().shared_memory().UnwatchMemoryRange(base_watch_handle_);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (used_previous_) {
|
|
|
|
used_previous_->used_next_ = used_next_;
|
|
|
|
} else {
|
|
|
|
texture_cache_.texture_used_first_ = used_next_;
|
|
|
|
}
|
|
|
|
if (used_next_) {
|
|
|
|
used_next_->used_previous_ = used_previous_;
|
|
|
|
} else {
|
|
|
|
texture_cache_.texture_used_last_ = used_previous_;
|
|
|
|
}
|
|
|
|
|
|
|
|
texture_cache_.UpdateTexturesTotalHostMemoryUsage(0, host_memory_usage_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::Texture::MakeUpToDateAndWatch(
|
|
|
|
const std::unique_lock<std::recursive_mutex>& global_lock) {
|
|
|
|
SharedMemory& shared_memory = texture_cache().shared_memory();
|
|
|
|
if (base_outdated_) {
|
|
|
|
assert_not_zero(GetGuestBaseSize());
|
|
|
|
base_outdated_ = false;
|
|
|
|
base_watch_handle_ = shared_memory.WatchMemoryRange(
|
|
|
|
key().base_page << 12, GetGuestBaseSize(), TextureCache::WatchCallback,
|
|
|
|
this, nullptr, 0);
|
|
|
|
}
|
|
|
|
if (mips_outdated_) {
|
|
|
|
assert_not_zero(GetGuestMipsSize());
|
|
|
|
mips_outdated_ = false;
|
|
|
|
mips_watch_handle_ = shared_memory.WatchMemoryRange(
|
|
|
|
key().mip_page << 12, GetGuestMipsSize(), TextureCache::WatchCallback,
|
|
|
|
this, nullptr, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::Texture::MarkAsUsed() {
|
2022-06-28 19:04:26 +00:00
|
|
|
assert_true(last_usage_submission_index_ <=
|
|
|
|
texture_cache_.current_submission_index_);
|
2022-05-14 13:18:10 +00:00
|
|
|
// This is called very frequently, don't relink unless needed for caching.
|
2022-06-28 19:04:26 +00:00
|
|
|
if (last_usage_submission_index_ >=
|
2022-05-14 13:18:10 +00:00
|
|
|
texture_cache_.current_submission_index_) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
last_usage_submission_index_ = texture_cache_.current_submission_index_;
|
|
|
|
last_usage_time_ = texture_cache_.current_submission_time_;
|
|
|
|
if (used_next_ == nullptr) {
|
|
|
|
// Already the most recently used.
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (used_previous_ != nullptr) {
|
|
|
|
used_previous_->used_next_ = used_next_;
|
|
|
|
} else {
|
|
|
|
texture_cache_.texture_used_first_ = used_next_;
|
|
|
|
}
|
|
|
|
used_next_->used_previous_ = used_previous_;
|
|
|
|
used_previous_ = texture_cache_.texture_used_last_;
|
|
|
|
used_next_ = nullptr;
|
2022-06-28 19:04:26 +00:00
|
|
|
texture_cache_.texture_used_last_->used_next_ = this;
|
2022-05-14 13:18:10 +00:00
|
|
|
texture_cache_.texture_used_last_ = this;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::Texture::WatchCallback(
|
|
|
|
[[maybe_unused]] const std::unique_lock<std::recursive_mutex>& global_lock,
|
|
|
|
bool is_mip) {
|
|
|
|
if (is_mip) {
|
|
|
|
assert_not_zero(GetGuestMipsSize());
|
|
|
|
mips_outdated_ = true;
|
|
|
|
mips_watch_handle_ = nullptr;
|
|
|
|
} else {
|
|
|
|
assert_not_zero(GetGuestBaseSize());
|
|
|
|
base_outdated_ = true;
|
|
|
|
base_watch_handle_ = nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::WatchCallback(
|
|
|
|
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
|
|
|
void* data, uint64_t argument, bool invalidated_by_gpu) {
|
|
|
|
Texture& texture = *static_cast<Texture*>(context);
|
|
|
|
texture.WatchCallback(global_lock, argument != 0);
|
|
|
|
texture.texture_cache().texture_became_outdated_.store(
|
|
|
|
true, std::memory_order_release);
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::DestroyAllTextures(bool from_destructor) {
|
|
|
|
ResetTextureBindings(from_destructor);
|
|
|
|
textures_.clear();
|
|
|
|
COUNT_profile_set("gpu/texture_cache/textures", 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
|
|
|
// Check if the texture is a scaled resolve texture.
|
|
|
|
if (IsDrawResolutionScaled() && key.tiled &&
|
|
|
|
IsScaledResolveSupportedForFormat(key)) {
|
|
|
|
texture_util::TextureGuestLayout scaled_resolve_guest_layout =
|
|
|
|
key.GetGuestLayout();
|
|
|
|
if ((scaled_resolve_guest_layout.base.level_data_extent_bytes &&
|
|
|
|
IsRangeScaledResolved(
|
|
|
|
key.base_page << 12,
|
|
|
|
scaled_resolve_guest_layout.base.level_data_extent_bytes)) ||
|
|
|
|
(scaled_resolve_guest_layout.mips_total_extent_bytes &&
|
|
|
|
IsRangeScaledResolved(
|
|
|
|
key.mip_page << 12,
|
|
|
|
scaled_resolve_guest_layout.mips_total_extent_bytes))) {
|
|
|
|
key.scaled_resolve = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t host_width = key.GetWidth();
|
|
|
|
uint32_t host_height = key.GetHeight();
|
|
|
|
if (key.scaled_resolve) {
|
|
|
|
host_width *= draw_resolution_scale_x();
|
|
|
|
host_height *= draw_resolution_scale_y();
|
|
|
|
}
|
|
|
|
// With 3x resolution scaling, a 2D texture may become bigger than the
|
|
|
|
// Direct3D 11 limit, and with 2x, a 3D one as well.
|
|
|
|
// TODO(Triang3l): Skip mips on Vulkan in this case - the minimum requirement
|
|
|
|
// there is 4096, which is below the Xenos maximum texture size of 8192.
|
|
|
|
uint32_t max_host_width_height = GetMaxHostTextureWidthHeight(key.dimension);
|
|
|
|
uint32_t max_host_depth_or_array_size =
|
|
|
|
GetMaxHostTextureDepthOrArraySize(key.dimension);
|
|
|
|
if (host_width > max_host_width_height ||
|
|
|
|
host_height > max_host_width_height ||
|
|
|
|
key.GetDepthOrArraySize() > max_host_depth_or_array_size) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to find an existing texture.
|
|
|
|
// TODO(Triang3l): Reuse a texture with mip_page unchanged, but base_page
|
|
|
|
// previously 0, now not 0, to save memory - common case in streaming.
|
|
|
|
auto found_texture_it = textures_.find(key);
|
|
|
|
if (found_texture_it != textures_.end()) {
|
|
|
|
return found_texture_it->second.get();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create the texture and add it to the map.
|
|
|
|
Texture* texture;
|
|
|
|
{
|
|
|
|
std::unique_ptr<Texture> new_texture = CreateTexture(key);
|
|
|
|
if (!new_texture) {
|
|
|
|
key.LogAction("Failed to create");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
assert_true(new_texture->key() == key);
|
|
|
|
texture =
|
|
|
|
textures_.emplace(key, std::move(new_texture)).first->second.get();
|
|
|
|
}
|
|
|
|
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
|
|
|
|
texture->LogAction("Created");
|
|
|
|
return texture;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TextureCache::LoadTextureData(Texture& texture) {
|
|
|
|
// Check what needs to be uploaded.
|
|
|
|
bool base_outdated, mips_outdated;
|
|
|
|
{
|
|
|
|
auto global_lock = global_critical_region_.Acquire();
|
|
|
|
base_outdated = texture.base_outdated(global_lock);
|
|
|
|
mips_outdated = texture.mips_outdated(global_lock);
|
|
|
|
}
|
|
|
|
if (!base_outdated && !mips_outdated) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
TextureKey texture_key = texture.key();
|
|
|
|
|
2022-06-29 20:38:06 +00:00
|
|
|
// Implementation may load multiple blocks at once via accesses of up to 128
|
|
|
|
// bits (R32G32B32A32_UINT), so aligning the size to this value to make sure
|
|
|
|
// if the texture is small (especially if it's linear), the last blocks won't
|
|
|
|
// be cut off (hosts may return 0, 0, 0, 0 for the whole R32G32B32A32_UINT
|
|
|
|
// access for the non-16-aligned tail even if 1...15 bytes are actually
|
|
|
|
// provided for it).
|
|
|
|
|
2022-05-14 13:18:10 +00:00
|
|
|
// Request uploading of the texture data to the shared memory.
|
|
|
|
// This is also necessary when resolution scaling is used - the texture cache
|
|
|
|
// relies on shared memory for invalidation of both unscaled and scaled
|
|
|
|
// textures. Plus a texture may be unscaled partially, when only a portion of
|
|
|
|
// its pages is invalidated, in this case we'll need the texture from the
|
|
|
|
// shared memory to load the unscaled parts.
|
|
|
|
// TODO(Triang3l): Load unscaled parts.
|
|
|
|
bool base_resolved = texture.GetBaseResolved();
|
|
|
|
if (base_outdated) {
|
|
|
|
if (!shared_memory().RequestRange(
|
2022-06-29 20:38:06 +00:00
|
|
|
texture_key.base_page << 12,
|
|
|
|
xe::align(texture.GetGuestBaseSize(), UINT32_C(16)),
|
2022-05-14 13:18:10 +00:00
|
|
|
texture_key.scaled_resolve ? nullptr : &base_resolved)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bool mips_resolved = texture.GetMipsResolved();
|
|
|
|
if (mips_outdated) {
|
|
|
|
if (!shared_memory().RequestRange(
|
2022-06-29 20:38:06 +00:00
|
|
|
texture_key.mip_page << 12,
|
|
|
|
xe::align(texture.GetGuestMipsSize(), UINT32_C(16)),
|
2022-05-14 13:18:10 +00:00
|
|
|
texture_key.scaled_resolve ? nullptr : &mips_resolved)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (texture_key.scaled_resolve) {
|
|
|
|
// Make sure all the scaled resolve memory is resident and accessible from
|
|
|
|
// the shader, including any possible padding that hasn't yet been touched
|
|
|
|
// by an actual resolve, but is still included in the texture size, so the
|
|
|
|
// GPU won't be trying to access unmapped memory.
|
|
|
|
if (!EnsureScaledResolveMemoryCommitted(texture_key.base_page << 12,
|
2022-06-29 20:38:06 +00:00
|
|
|
texture.GetGuestBaseSize(), 4)) {
|
2022-05-14 13:18:10 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (!EnsureScaledResolveMemoryCommitted(texture_key.mip_page << 12,
|
2022-06-29 20:38:06 +00:00
|
|
|
texture.GetGuestMipsSize(), 4)) {
|
2022-05-14 13:18:10 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Actually load the texture data.
|
|
|
|
if (!LoadTextureDataFromResidentMemoryImpl(texture, base_outdated,
|
|
|
|
mips_outdated)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the source of the texture (resolve vs. CPU or memexport) for
|
|
|
|
// purposes of handling piecewise gamma emulation via sRGB and for resolution
|
|
|
|
// scale in sampling offsets.
|
|
|
|
if (!texture_key.scaled_resolve) {
|
|
|
|
texture.SetBaseResolved(base_resolved);
|
|
|
|
texture.SetMipsResolved(mips_resolved);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark the ranges as uploaded and watch them. This is needed for scaled
|
|
|
|
// resolves as well to detect when the CPU wants to reuse the memory for a
|
|
|
|
// regular texture or a vertex buffer, and thus the scaled resolve version is
|
|
|
|
// not up to date anymore.
|
|
|
|
texture.MakeUpToDateAndWatch(global_critical_region_.Acquire());
|
|
|
|
|
|
|
|
texture.LogAction("Loaded");
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::BindingInfoFromFetchConstant(
|
|
|
|
const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out,
|
|
|
|
uint8_t* swizzled_signs_out) {
|
|
|
|
// Reset the key and the signedness.
|
|
|
|
key_out.MakeInvalid();
|
|
|
|
if (swizzled_signs_out != nullptr) {
|
|
|
|
*swizzled_signs_out =
|
|
|
|
uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (fetch.type) {
|
|
|
|
case xenos::FetchConstantType::kTexture:
|
|
|
|
break;
|
|
|
|
case xenos::FetchConstantType::kInvalidTexture:
|
|
|
|
if (cvars::gpu_allow_invalid_fetch_constants) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
XELOGW(
|
|
|
|
"Texture fetch constant ({:08X} {:08X} {:08X} {:08X} {:08X} {:08X}) "
|
|
|
|
"has \"invalid\" type! This is incorrect behavior, but you can try "
|
|
|
|
"bypassing this by launching Xenia with "
|
|
|
|
"--gpu_allow_invalid_fetch_constants=true.",
|
|
|
|
fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3,
|
|
|
|
fetch.dword_4, fetch.dword_5);
|
|
|
|
return;
|
|
|
|
default:
|
|
|
|
XELOGW(
|
|
|
|
"Texture fetch constant ({:08X} {:08X} {:08X} {:08X} {:08X} {:08X}) "
|
|
|
|
"is completely invalid!",
|
|
|
|
fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3,
|
|
|
|
fetch.dword_4, fetch.dword_5);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t width_minus_1, height_minus_1, depth_or_array_size_minus_1;
|
|
|
|
uint32_t base_page, mip_page, mip_max_level;
|
|
|
|
texture_util::GetSubresourcesFromFetchConstant(
|
|
|
|
fetch, &width_minus_1, &height_minus_1, &depth_or_array_size_minus_1,
|
|
|
|
&base_page, &mip_page, nullptr, &mip_max_level);
|
|
|
|
if (base_page == 0 && mip_page == 0) {
|
|
|
|
// No texture data at all.
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (fetch.dimension == xenos::DataDimension::k1D) {
|
|
|
|
bool is_invalid_1d = false;
|
|
|
|
// TODO(Triang3l): Support long 1D textures.
|
|
|
|
if (width_minus_1 >= xenos::kTexture2DCubeMaxWidthHeight) {
|
|
|
|
XELOGE(
|
|
|
|
"1D texture is too wide ({}) - ignoring! Report the game to Xenia "
|
|
|
|
"developers",
|
|
|
|
width_minus_1 + 1);
|
|
|
|
is_invalid_1d = true;
|
|
|
|
}
|
|
|
|
assert_false(fetch.tiled);
|
|
|
|
if (fetch.tiled) {
|
|
|
|
XELOGE(
|
|
|
|
"1D texture has tiling enabled in the fetch constant, but this "
|
|
|
|
"appears to be completely wrong - ignoring! Report the game to Xenia "
|
|
|
|
"developers");
|
|
|
|
is_invalid_1d = true;
|
|
|
|
}
|
|
|
|
assert_false(fetch.packed_mips);
|
|
|
|
if (fetch.packed_mips) {
|
|
|
|
XELOGE(
|
|
|
|
"1D texture has packed mips enabled in the fetch constant, but this "
|
|
|
|
"appears to be completely wrong - ignoring! Report the game to Xenia "
|
|
|
|
"developers");
|
|
|
|
is_invalid_1d = true;
|
|
|
|
}
|
|
|
|
if (is_invalid_1d) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
xenos::TextureFormat format = GetBaseFormat(fetch.format);
|
|
|
|
|
|
|
|
key_out.base_page = base_page;
|
|
|
|
key_out.mip_page = mip_page;
|
|
|
|
key_out.dimension = fetch.dimension;
|
|
|
|
key_out.width_minus_1 = width_minus_1;
|
|
|
|
key_out.height_minus_1 = height_minus_1;
|
|
|
|
key_out.depth_or_array_size_minus_1 = depth_or_array_size_minus_1;
|
|
|
|
key_out.pitch = fetch.pitch;
|
|
|
|
key_out.mip_max_level = mip_max_level;
|
|
|
|
key_out.tiled = fetch.tiled;
|
|
|
|
key_out.packed_mips = fetch.packed_mips;
|
|
|
|
key_out.format = format;
|
|
|
|
key_out.endianness = fetch.endianness;
|
|
|
|
|
|
|
|
key_out.is_valid = 1;
|
|
|
|
|
|
|
|
if (swizzled_signs_out != nullptr) {
|
|
|
|
*swizzled_signs_out = texture_util::SwizzleSigns(fetch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::ResetTextureBindings(bool from_destructor) {
|
|
|
|
uint32_t bindings_reset = 0;
|
|
|
|
for (size_t i = 0; i < texture_bindings_.size(); ++i) {
|
|
|
|
TextureBinding& binding = texture_bindings_[i];
|
|
|
|
if (!binding.key.is_valid) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
binding.Reset();
|
|
|
|
bindings_reset |= UINT32_C(1) << i;
|
|
|
|
}
|
|
|
|
texture_bindings_in_sync_ &= ~bindings_reset;
|
|
|
|
if (!from_destructor && bindings_reset) {
|
|
|
|
UpdateTextureBindingsImpl(bindings_reset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::UpdateTexturesTotalHostMemoryUsage(uint64_t add,
|
|
|
|
uint64_t subtract) {
|
|
|
|
textures_total_host_memory_usage_ =
|
|
|
|
textures_total_host_memory_usage_ - subtract + add;
|
|
|
|
COUNT_profile_set("gpu/texture_cache/total_host_memory_usage_mb",
|
|
|
|
uint32_t((textures_total_host_memory_usage_ +
|
|
|
|
((UINT32_C(1) << 20) - 1)) >>
|
|
|
|
20));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled,
|
|
|
|
uint32_t length_unscaled) {
|
|
|
|
if (!IsDrawResolutionScaled()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
start_unscaled = std::min(start_unscaled, SharedMemory::kBufferSize);
|
|
|
|
length_unscaled =
|
|
|
|
std::min(length_unscaled, SharedMemory::kBufferSize - start_unscaled);
|
|
|
|
if (!length_unscaled) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Two-level check for faster rejection since resolve targets are usually
|
|
|
|
// placed in relatively small and localized memory portions (confirmed by
|
|
|
|
// testing - pretty much all times the deeper level was entered, the texture
|
|
|
|
// was a resolve target).
|
|
|
|
uint32_t page_first = start_unscaled >> 12;
|
|
|
|
uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12;
|
|
|
|
uint32_t block_first = page_first >> 5;
|
|
|
|
uint32_t block_last = page_last >> 5;
|
|
|
|
uint32_t l2_block_first = block_first >> 6;
|
|
|
|
uint32_t l2_block_last = block_last >> 6;
|
|
|
|
auto global_lock = global_critical_region_.Acquire();
|
|
|
|
for (uint32_t i = l2_block_first; i <= l2_block_last; ++i) {
|
|
|
|
uint64_t l2_block = scaled_resolve_pages_l2_[i];
|
|
|
|
if (i == l2_block_first) {
|
|
|
|
l2_block &= ~((UINT64_C(1) << (block_first & 63)) - 1);
|
|
|
|
}
|
|
|
|
if (i == l2_block_last && (block_last & 63) != 63) {
|
|
|
|
l2_block &= (UINT64_C(1) << ((block_last & 63) + 1)) - 1;
|
|
|
|
}
|
|
|
|
uint32_t block_relative_index;
|
|
|
|
while (xe::bit_scan_forward(l2_block, &block_relative_index)) {
|
|
|
|
l2_block &= ~(UINT64_C(1) << block_relative_index);
|
|
|
|
uint32_t block_index = (i << 6) + block_relative_index;
|
|
|
|
uint32_t check_bits = UINT32_MAX;
|
|
|
|
if (block_index == block_first) {
|
|
|
|
check_bits &= ~((UINT32_C(1) << (page_first & 31)) - 1);
|
|
|
|
}
|
|
|
|
if (block_index == block_last && (page_last & 31) != 31) {
|
|
|
|
check_bits &= (UINT32_C(1) << ((page_last & 31) + 1)) - 1;
|
|
|
|
}
|
|
|
|
if (scaled_resolve_pages_[block_index] & check_bits) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::ScaledResolveGlobalWatchCallbackThunk(
|
|
|
|
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
|
|
|
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
|
|
|
|
TextureCache* texture_cache = reinterpret_cast<TextureCache*>(context);
|
|
|
|
texture_cache->ScaledResolveGlobalWatchCallback(
|
|
|
|
global_lock, address_first, address_last, invalidated_by_gpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextureCache::ScaledResolveGlobalWatchCallback(
|
|
|
|
const std::unique_lock<std::recursive_mutex>& global_lock,
|
|
|
|
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
|
|
|
|
assert_true(IsDrawResolutionScaled());
|
|
|
|
if (invalidated_by_gpu) {
|
|
|
|
// Resolves themselves do exactly the opposite of what this should do.
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Mark scaled resolve ranges as non-scaled. Textures themselves will be
|
|
|
|
// invalidated by their shared memory watches.
|
|
|
|
uint32_t resolve_page_first = address_first >> 12;
|
|
|
|
uint32_t resolve_page_last = address_last >> 12;
|
|
|
|
uint32_t resolve_block_first = resolve_page_first >> 5;
|
|
|
|
uint32_t resolve_block_last = resolve_page_last >> 5;
|
|
|
|
uint32_t resolve_l2_block_first = resolve_block_first >> 6;
|
|
|
|
uint32_t resolve_l2_block_last = resolve_block_last >> 6;
|
|
|
|
for (uint32_t i = resolve_l2_block_first; i <= resolve_l2_block_last; ++i) {
|
|
|
|
uint64_t resolve_l2_block = scaled_resolve_pages_l2_[i];
|
|
|
|
uint32_t resolve_block_relative_index;
|
|
|
|
while (
|
|
|
|
xe::bit_scan_forward(resolve_l2_block, &resolve_block_relative_index)) {
|
|
|
|
resolve_l2_block &= ~(UINT64_C(1) << resolve_block_relative_index);
|
|
|
|
uint32_t resolve_block_index = (i << 6) + resolve_block_relative_index;
|
|
|
|
uint32_t resolve_keep_bits = 0;
|
|
|
|
if (resolve_block_index == resolve_block_first) {
|
|
|
|
resolve_keep_bits |= (UINT32_C(1) << (resolve_page_first & 31)) - 1;
|
|
|
|
}
|
|
|
|
if (resolve_block_index == resolve_block_last &&
|
|
|
|
(resolve_page_last & 31) != 31) {
|
|
|
|
resolve_keep_bits |=
|
|
|
|
~((UINT32_C(1) << ((resolve_page_last & 31) + 1)) - 1);
|
|
|
|
}
|
|
|
|
scaled_resolve_pages_[resolve_block_index] &= resolve_keep_bits;
|
|
|
|
if (scaled_resolve_pages_[resolve_block_index] == 0) {
|
|
|
|
scaled_resolve_pages_l2_[i] &=
|
|
|
|
~(UINT64_C(1) << resolve_block_relative_index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace gpu
|
|
|
|
} // namespace xe
|