/** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/gpu/texture_cache.h" #include "xenia/base/clock.h" #include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/profiling.h" #include "xenia/gpu/gpu_flags.h" DEFINE_int32( draw_resolution_scale_x, 1, "Integer pixel width scale used for scaling the rendering resolution " "opaquely to the game.\n" "1, 2 and 3 may be supported, but support of anything above 1 depends on " "the device properties, such as whether it supports sparse binding / tiled " "resources, the number of virtual address bits per resource, and other " "factors.\n" "Various effects and parts of game rendering pipelines may work " "incorrectly as pixels become ambiguous from the game's perspective and " "because half-pixel offset (which normally doesn't affect coverage when " "MSAA isn't used) becomes full-pixel.", "GPU"); DEFINE_int32( draw_resolution_scale_y, 1, "Integer pixel width scale used for scaling the rendering resolution " "opaquely to the game.\n" "See draw_resolution_scale_x for more information.", "GPU"); DEFINE_uint32( texture_cache_memory_limit_soft, 384, "Maximum host texture memory usage (in megabytes) above which old textures " "will be destroyed.", "GPU"); DEFINE_uint32( texture_cache_memory_limit_soft_lifetime, 30, "Seconds a texture should be unused to be considered old enough to be " "deleted if texture memory usage exceeds texture_cache_memory_limit_soft.", "GPU"); DEFINE_uint32( texture_cache_memory_limit_hard, 768, "Maximum host texture memory usage (in megabytes) above which textures " "will be destroyed as soon as possible.", "GPU"); DEFINE_uint32( texture_cache_memory_limit_render_to_texture, 24, "Part of the host texture memory budget (in megabytes) that will be scaled " "by the current drawing resolution scale.\n" "If texture_cache_memory_limit_soft, for instance, is 384, and this is 24, " "it will be assumed that the game will be using roughly 24 MB of " "render-to-texture (resolve) targets and 384 - 24 = 360 MB of regular " "textures - so with 2x2 resolution scaling, the soft limit will be 360 + " "96 MB, and with 3x3, it will be 360 + 216 MB.", "GPU"); namespace xe { namespace gpu { const TextureCache::LoadShaderInfo TextureCache::load_shader_info_[kLoadShaderCount] = { // k8bpb {3, 4, 1, 4}, // k16bpb {4, 4, 2, 4}, // k32bpb {4, 4, 4, 3}, // k64bpb {4, 4, 8, 2}, // k128bpb {4, 4, 16, 1}, // kR5G5B5A1ToB5G5R5A1 {4, 4, 2, 4}, // kR5G6B5ToB5G6R5 {4, 4, 2, 4}, // kR5G5B6ToB5G6R5WithRBGASwizzle {4, 4, 2, 4}, // kRGBA4ToBGRA4 {4, 4, 2, 4}, // kRGBA4ToARGB4 {4, 4, 2, 4}, // kGBGR8ToGRGB8 {4, 4, 4, 3}, // kGBGR8ToRGB8 {4, 4, 8, 3}, // kBGRG8ToRGBG8 {4, 4, 4, 3}, // kBGRG8ToRGB8 {4, 4, 8, 3}, // kR10G11B11ToRGBA16 {4, 4, 8, 3}, // kR10G11B11ToRGBA16SNorm {4, 4, 8, 3}, // kR11G11B10ToRGBA16 {4, 4, 8, 3}, // kR11G11B10ToRGBA16SNorm {4, 4, 8, 3}, // kR16UNormToFloat {4, 4, 2, 4}, // kR16SNormToFloat {4, 4, 2, 4}, // kRG16UNormToFloat {4, 4, 4, 3}, // kRG16SNormToFloat {4, 4, 4, 3}, // kRGBA16UNormToFloat {4, 4, 8, 2}, // kRGBA16SNormToFloat {4, 4, 8, 2}, // kDXT1ToRGBA8 {4, 4, 4, 2}, // kDXT3ToRGBA8 {4, 4, 4, 1}, // kDXT5ToRGBA8 {4, 4, 4, 1}, // kDXNToRG8 {4, 4, 2, 1}, // kDXT3A {4, 4, 1, 2}, // kDXT3AAs1111ToBGRA4 {4, 4, 2, 2}, // kDXT3AAs1111ToARGB4 {4, 4, 2, 2}, // kDXT5AToR8 {4, 4, 1, 2}, // kCTX1 {4, 4, 2, 2}, // kDepthUnorm {4, 4, 4, 3}, // kDepthFloat {4, 4, 4, 3}, }; TextureCache::TextureCache(const RegisterFile& register_file, SharedMemory& shared_memory, uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y) : register_file_(register_file), shared_memory_(shared_memory), draw_resolution_scale_x_(draw_resolution_scale_x), draw_resolution_scale_y_(draw_resolution_scale_y), draw_resolution_scale_x_divisor_(draw_resolution_scale_x), draw_resolution_scale_y_divisor_(draw_resolution_scale_y) { assert_true(draw_resolution_scale_x >= 1); assert_true(draw_resolution_scale_x <= kMaxDrawResolutionScaleAlongAxis); assert_true(draw_resolution_scale_y >= 1); assert_true(draw_resolution_scale_y <= kMaxDrawResolutionScaleAlongAxis); if (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) { constexpr uint32_t kScaledResolvePageDwordCount = SharedMemory::kBufferSize / 4096 / 32; scaled_resolve_pages_ = std::unique_ptr(new uint32_t[kScaledResolvePageDwordCount]); std::memset(scaled_resolve_pages_.get(), 0, kScaledResolvePageDwordCount * sizeof(uint32_t)); std::memset(scaled_resolve_pages_l2_, 0, sizeof(scaled_resolve_pages_l2_)); scaled_resolve_global_watch_handle_ = shared_memory.RegisterGlobalWatch( ScaledResolveGlobalWatchCallbackThunk, this); } } TextureCache::~TextureCache() { DestroyAllTextures(true); if (scaled_resolve_global_watch_handle_) { shared_memory().UnregisterGlobalWatch(scaled_resolve_global_watch_handle_); } } bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out, uint32_t& y_out) { uint32_t config_x = uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_x)); uint32_t config_y = uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_y)); uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x); uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y); x_out = clamped_x; y_out = clamped_y; return clamped_x == config_x && clamped_y == config_y; } void TextureCache::ClearCache() { DestroyAllTextures(); } void TextureCache::CompletedSubmissionUpdated( uint64_t completed_submission_index) { // If memory usage is too high, destroy unused textures. uint64_t current_time = xe::Clock::QueryHostUptimeMillis(); // texture_cache_memory_limit_render_to_texture is assumed to be included in // texture_cache_memory_limit_soft and texture_cache_memory_limit_hard, at 1x, // so subtracting 1 from the scale. uint32_t limit_scaled_resolve_add_mb = cvars::texture_cache_memory_limit_render_to_texture * (draw_resolution_scale_x() * draw_resolution_scale_y() - 1); uint32_t limit_soft_mb = cvars::texture_cache_memory_limit_soft + limit_scaled_resolve_add_mb; uint32_t limit_hard_mb = cvars::texture_cache_memory_limit_hard + limit_scaled_resolve_add_mb; uint32_t limit_soft_lifetime = cvars::texture_cache_memory_limit_soft_lifetime * 1000; bool destroyed_any = false; while (texture_used_first_ != nullptr) { uint64_t total_host_memory_usage_mb = (textures_total_host_memory_usage_ + ((UINT32_C(1) << 20) - 1)) >> 20; bool limit_hard_exceeded = total_host_memory_usage_mb > limit_hard_mb; if (total_host_memory_usage_mb <= limit_soft_mb && !limit_hard_exceeded) { break; } Texture* texture = texture_used_first_; if (texture->last_usage_submission_index() > completed_submission_index) { break; } if (!limit_hard_exceeded && (texture->last_usage_time() + limit_soft_lifetime) > current_time) { break; } if (!destroyed_any) { destroyed_any = true; // The texture being destroyed might have been bound in the previous // submissions, and nothing has overwritten the binding yet, so completion // of the submission where the texture was last actually used on the GPU // doesn't imply that it's not bound currently. Reset bindings if // any texture has been destroyed. ResetTextureBindings(); } // Remove the texture from the map and destroy it via its unique_ptr. auto found_texture_it = textures_.find(texture->key()); assert_true(found_texture_it != textures_.end()); if (found_texture_it != textures_.end()) { assert_true(found_texture_it->second.get() == texture); textures_.erase(found_texture_it); // `texture` is invalid now. } } if (destroyed_any) { COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); } } void TextureCache::BeginSubmission(uint64_t new_submission_index) { assert_true(new_submission_index > current_submission_index_); current_submission_index_ = new_submission_index; current_submission_time_ = xe::Clock::QueryHostUptimeMillis(); } void TextureCache::BeginFrame() { // In case there was a failure to create something in the previous frame, make // sure bindings are reset so a new attempt will surely be made if the texture // is requested again. ResetTextureBindings(); } void TextureCache::MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled) { if (length_unscaled == 0) { return; } start_unscaled &= 0x1FFFFFFF; length_unscaled = std::min(length_unscaled, 0x20000000 - start_unscaled); if (IsDrawResolutionScaled()) { uint32_t page_first = start_unscaled >> 12; uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12; uint32_t block_first = page_first >> 5; uint32_t block_last = page_last >> 5; auto global_lock = global_critical_region_.Acquire(); for (uint32_t i = block_first; i <= block_last; ++i) { uint32_t add_bits = UINT32_MAX; if (i == block_first) { add_bits &= ~((UINT32_C(1) << (page_first & 31)) - 1); } if (i == block_last && (page_last & 31) != 31) { add_bits &= (UINT32_C(1) << ((page_last & 31) + 1)) - 1; } scaled_resolve_pages_[i] |= add_bits; scaled_resolve_pages_l2_[i >> 6] |= UINT64_C(1) << (i & 63); } } // Invalidate textures. Toggling individual textures between scaled and // unscaled also relies on invalidation through shared memory. shared_memory().RangeWrittenByGpu(start_unscaled, length_unscaled, true); } uint32_t TextureCache::GuestToHostSwizzle(uint32_t guest_swizzle, uint32_t host_format_swizzle) { uint32_t host_swizzle = 0; for (uint32_t i = 0; i < 4; ++i) { uint32_t guest_swizzle_component = (guest_swizzle >> (3 * i)) & 0b111; uint32_t host_swizzle_component; if (guest_swizzle_component >= xenos::XE_GPU_TEXTURE_SWIZZLE_0) { // Get rid of 6 and 7 values (to prevent host GPU errors if the game has // something broken) the simple way - by changing them to 4 (0) and 5 (1). host_swizzle_component = guest_swizzle_component & 0b101; } else { host_swizzle_component = (host_format_swizzle >> (3 * guest_swizzle_component)) & 0b111; } host_swizzle |= host_swizzle_component << (3 * i); } return host_swizzle; } void TextureCache::RequestTextures(uint32_t used_texture_mask) { const auto& regs = register_file(); if (texture_became_outdated_.exchange(false, std::memory_order_acquire)) { // A texture has become outdated - make sure whether textures are outdated // is rechecked in this draw and in subsequent ones to reload the new data // if needed. ResetTextureBindings(); } // Update the texture keys and the textures. uint32_t bindings_changed = 0; uint32_t textures_remaining = used_texture_mask & ~texture_bindings_in_sync_; uint32_t index = 0; Texture* textures_to_load[64]; // max bits = 32, can be unsigned + signed // means max array size = 64 uint32_t num_textures_to_load = 0; while (xe::bit_scan_forward(textures_remaining, &index)) { uint32_t index_bit = UINT32_C(1) << index; textures_remaining = xe::clear_lowest_bit(textures_remaining); TextureBinding& binding = texture_bindings_[index]; xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index); TextureKey old_key = binding.key; uint8_t old_swizzled_signs = binding.swizzled_signs; BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs); texture_bindings_in_sync_ |= index_bit; if (!binding.key.is_valid) { if (old_key.is_valid) { bindings_changed |= index_bit; } binding.Reset(); continue; } uint32_t old_host_swizzle = binding.host_swizzle; binding.host_swizzle = GuestToHostSwizzle(fetch.swizzle, GetHostFormatSwizzle(binding.key)); // Check if need to load the unsigned and the signed versions of the texture // (if the format is emulated with different host bit representations for // signed and unsigned - otherwise only the unsigned one is loaded). bool key_changed = binding.key != old_key; bool any_sign_was_not_signed = texture_util::IsAnySignNotSigned(old_swizzled_signs); bool any_sign_was_signed = texture_util::IsAnySignSigned(old_swizzled_signs); bool any_sign_is_not_signed = texture_util::IsAnySignNotSigned(binding.swizzled_signs); bool any_sign_is_signed = texture_util::IsAnySignSigned(binding.swizzled_signs); if (key_changed || binding.host_swizzle != old_host_swizzle || any_sign_is_not_signed != any_sign_was_not_signed || any_sign_is_signed != any_sign_was_signed) { bindings_changed |= index_bit; } bool load_unsigned_data = false, load_signed_data = false; if (IsSignedVersionSeparateForFormat(binding.key)) { // Can reuse previously loaded unsigned/signed versions if the key is the // same and the texture was previously bound as unsigned/signed // respectively (checking the previous values of signedness rather than // binding.texture != nullptr and binding.texture_signed != nullptr also // prevents repeated attempts to load the texture if it has failed to // load). if (any_sign_is_not_signed) { if (key_changed || !any_sign_was_not_signed) { binding.texture = FindOrCreateTexture(binding.key); load_unsigned_data = true; } } else { binding.texture = nullptr; } if (any_sign_is_signed) { if (key_changed || !any_sign_was_signed) { TextureKey signed_key = binding.key; signed_key.signed_separate = 1; binding.texture_signed = FindOrCreateTexture(signed_key); load_signed_data = true; } } else { binding.texture_signed = nullptr; } } else { // Same resource for both unsigned and signed, but descriptor formats may // be different. if (key_changed) { binding.texture = FindOrCreateTexture(binding.key); load_unsigned_data = true; } binding.texture_signed = nullptr; } if (load_unsigned_data && binding.texture != nullptr) { textures_to_load[num_textures_to_load++] = binding.texture; } if (load_signed_data && binding.texture_signed != nullptr) { textures_to_load[num_textures_to_load++] = binding.texture_signed; } } LoadTexturesData(textures_to_load, num_textures_to_load); if (bindings_changed) { UpdateTextureBindingsImpl(bindings_changed); } } const char* TextureCache::TextureKey::GetLogDimensionName( xenos::DataDimension dimension) { switch (dimension) { case xenos::DataDimension::k1D: return "1D"; case xenos::DataDimension::k2DOrStacked: return "2D"; case xenos::DataDimension::k3D: return "3D"; case xenos::DataDimension::kCube: return "cube"; default: assert_unhandled_case(dimension); return "unknown"; } } void TextureCache::TextureKey::LogAction(const char* action) const { XELOGGPU( "{} {} {}{}x{}x{} {} {} texture with {} {}packed mip level{}, " "base at 0x{:08X} (pitch {}), mips at 0x{:08X}", action, tiled ? "tiled" : "linear", scaled_resolve ? "scaled " : "", GetWidth(), GetHeight(), GetDepthOrArraySize(), GetLogDimensionName(), FormatInfo::GetName(format), mip_max_level + 1, packed_mips ? "" : "un", mip_max_level != 0 ? "s" : "", base_page << 12, pitch << 5, mip_page << 12); } void TextureCache::Texture::LogAction(const char* action) const { XELOGGPU( "{} {} {}{}x{}x{} {} {} texture with {} {}packed mip level{}, " "base at 0x{:08X} (pitch {}, size 0x{:08X}), mips at 0x{:08X} (size " "0x{:08X})", action, key_.tiled ? "tiled" : "linear", key_.scaled_resolve ? "scaled " : "", key_.GetWidth(), key_.GetHeight(), key_.GetDepthOrArraySize(), key_.GetLogDimensionName(), FormatInfo::GetName(key_.format), key_.mip_max_level + 1, key_.packed_mips ? "" : "un", key_.mip_max_level != 0 ? "s" : "", key_.base_page << 12, key_.pitch << 5, GetGuestBaseSize(), key_.mip_page << 12, GetGuestMipsSize()); } // The texture must be in the recent usage list. Place it in front now because // after creation, the texture will likely be used immediately, and it should // not be destroyed immediately after creation if dropping of old textures is // performed somehow. The list is maintained by the Texture, not the // TextureCache itself (unlike the `textures_` container). TextureCache::Texture::Texture(TextureCache& texture_cache, const TextureKey& key) : texture_cache_(texture_cache), key_(key), guest_layout_(key.GetGuestLayout()), base_resolved_(key.scaled_resolve), mips_resolved_(key.scaled_resolve), last_usage_submission_index_(texture_cache.current_submission_index_), last_usage_time_(texture_cache.current_submission_time_), used_previous_(texture_cache.texture_used_last_), used_next_(nullptr) { if (texture_cache.texture_used_last_) { texture_cache.texture_used_last_->used_next_ = this; } else { texture_cache.texture_used_first_ = this; } texture_cache.texture_used_last_ = this; // Never try to upload data that doesn't exist. base_outdated_ = guest_layout().base.level_data_extent_bytes != 0; mips_outdated_ = guest_layout().mips_total_extent_bytes != 0; } TextureCache::Texture::~Texture() { if (mips_watch_handle_) { texture_cache().shared_memory().UnwatchMemoryRange(mips_watch_handle_); } if (base_watch_handle_) { texture_cache().shared_memory().UnwatchMemoryRange(base_watch_handle_); } if (used_previous_) { used_previous_->used_next_ = used_next_; } else { texture_cache_.texture_used_first_ = used_next_; } if (used_next_) { used_next_->used_previous_ = used_previous_; } else { texture_cache_.texture_used_last_ = used_previous_; } texture_cache_.UpdateTexturesTotalHostMemoryUsage(0, host_memory_usage_); } void TextureCache::Texture::MakeUpToDateAndWatch( const global_unique_lock_type& global_lock) { SharedMemory& shared_memory = texture_cache().shared_memory(); if (base_outdated_) { assert_not_zero(GetGuestBaseSize()); base_outdated_ = false; base_watch_handle_ = shared_memory.WatchMemoryRange( key().base_page << 12, GetGuestBaseSize(), TextureCache::WatchCallback, this, nullptr, 0); } if (mips_outdated_) { assert_not_zero(GetGuestMipsSize()); mips_outdated_ = false; mips_watch_handle_ = shared_memory.WatchMemoryRange( key().mip_page << 12, GetGuestMipsSize(), TextureCache::WatchCallback, this, nullptr, 1); } } void TextureCache::Texture::MarkAsUsed() { assert_true(last_usage_submission_index_ <= texture_cache_.current_submission_index_); // This is called very frequently, don't relink unless needed for caching. if (last_usage_submission_index_ >= texture_cache_.current_submission_index_) { return; } last_usage_submission_index_ = texture_cache_.current_submission_index_; last_usage_time_ = texture_cache_.current_submission_time_; if (used_next_ == nullptr) { // Already the most recently used. return; } if (used_previous_ != nullptr) { used_previous_->used_next_ = used_next_; } else { texture_cache_.texture_used_first_ = used_next_; } used_next_->used_previous_ = used_previous_; used_previous_ = texture_cache_.texture_used_last_; used_next_ = nullptr; texture_cache_.texture_used_last_->used_next_ = this; texture_cache_.texture_used_last_ = this; } void TextureCache::Texture::WatchCallback( [[maybe_unused]] const global_unique_lock_type& global_lock, bool is_mip) { if (is_mip) { assert_not_zero(GetGuestMipsSize()); mips_outdated_ = true; mips_watch_handle_ = nullptr; } else { assert_not_zero(GetGuestBaseSize()); base_outdated_ = true; base_watch_handle_ = nullptr; } } void TextureCache::WatchCallback(const global_unique_lock_type& global_lock, void* context, void* data, uint64_t argument, bool invalidated_by_gpu) { Texture& texture = *static_cast(context); texture.WatchCallback(global_lock, argument != 0); texture.texture_cache().texture_became_outdated_.store( true, std::memory_order_release); } void TextureCache::DestroyAllTextures(bool from_destructor) { ResetTextureBindings(from_destructor); textures_.clear(); COUNT_profile_set("gpu/texture_cache/textures", 0); } TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { // Check if the texture is a scaled resolve texture. if (IsDrawResolutionScaled() && key.tiled && IsScaledResolveSupportedForFormat(key)) { texture_util::TextureGuestLayout scaled_resolve_guest_layout = key.GetGuestLayout(); if ((scaled_resolve_guest_layout.base.level_data_extent_bytes && IsRangeScaledResolved( key.base_page << 12, scaled_resolve_guest_layout.base.level_data_extent_bytes)) || (scaled_resolve_guest_layout.mips_total_extent_bytes && IsRangeScaledResolved( key.mip_page << 12, scaled_resolve_guest_layout.mips_total_extent_bytes))) { key.scaled_resolve = 1; } } uint32_t host_width = key.GetWidth(); uint32_t host_height = key.GetHeight(); if (key.scaled_resolve) { host_width *= draw_resolution_scale_x(); host_height *= draw_resolution_scale_y(); } // With 3x resolution scaling, a 2D texture may become bigger than the // Direct3D 11 limit, and with 2x, a 3D one as well. // TODO(Triang3l): Skip mips on Vulkan in this case - the minimum requirement // there is 4096, which is below the Xenos maximum texture size of 8192. uint32_t max_host_width_height = GetMaxHostTextureWidthHeight(key.dimension); uint32_t max_host_depth_or_array_size = GetMaxHostTextureDepthOrArraySize(key.dimension); if (host_width > max_host_width_height || host_height > max_host_width_height || key.GetDepthOrArraySize() > max_host_depth_or_array_size) { return nullptr; } // Try to find an existing texture. // TODO(Triang3l): Reuse a texture with mip_page unchanged, but base_page // previously 0, now not 0, to save memory - common case in streaming. auto found_texture_it = textures_.find(key); if (found_texture_it != textures_.end()) { return found_texture_it->second.get(); } // Create the texture and add it to the map. Texture* texture; { std::unique_ptr new_texture = CreateTexture(key); if (!new_texture) { key.LogAction("Failed to create"); return nullptr; } assert_true(new_texture->key() == key); texture = textures_.emplace(key, std::move(new_texture)).first->second.get(); } COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); texture->LogAction("Created"); return texture; } void TextureCache::LoadTexturesData(Texture** textures, uint32_t n_textures) { assert_true(n_textures <= 64); if (n_textures < 2) { if (!n_textures) { return; } else { LoadTextureData(*textures[0]); return; } } uint64_t index_base_outdated = 0; uint64_t index_mips_outdated = 0; uint32_t nkept = 0; { auto global_lock = global_critical_region_.Acquire(); for (uint32_t i = 0; i < n_textures; ++i) { Texture* current = textures[i]; auto base_outdated = current->base_outdated(global_lock); auto mips_outdated = current->mips_outdated(global_lock); index_base_outdated |= static_cast(base_outdated) << i; index_mips_outdated |= static_cast(mips_outdated) << i; if (!base_outdated && !mips_outdated) { textures[i] = nullptr; } else { nkept++; } } } if (nkept == 0) { return; } for (uint32_t i = 0; i < n_textures; ++i) { Texture* p_texture = textures[i]; if (!p_texture) { continue; } textures[i] = nullptr; Texture& texture = *p_texture; TextureKey texture_key = texture.key(); // Implementation may load multiple blocks at once via accesses of up to 128 // bits (R32G32B32A32_UINT), so aligning the size to this value to make sure // if the texture is small (especially if it's linear), the last blocks // won't be cut off (hosts may return 0, 0, 0, 0 for the whole // R32G32B32A32_UINT access for the non-16-aligned tail even if 1...15 bytes // are actually provided for it). // Request uploading of the texture data to the shared memory. // This is also necessary when resolution scaling is used - the texture // cache relies on shared memory for invalidation of both unscaled and // scaled textures. Plus a texture may be unscaled partially, when only a // portion of its pages is invalidated, in this case we'll need the texture // from the shared memory to load the unscaled parts. // TODO(Triang3l): Load unscaled parts. bool base_resolved = texture.GetBaseResolved(); if (index_base_outdated & (1ULL << i)) { if (!shared_memory().RequestRange( texture_key.base_page << 12, xe::align(texture.GetGuestBaseSize(), UINT32_C(16)), texture_key.scaled_resolve ? nullptr : &base_resolved)) { continue; } } bool mips_resolved = texture.GetMipsResolved(); if (index_mips_outdated & (1ULL << i)) { if (!shared_memory().RequestRange( texture_key.mip_page << 12, xe::align(texture.GetGuestMipsSize(), UINT32_C(16)), texture_key.scaled_resolve ? nullptr : &mips_resolved)) { continue; } } if (texture_key.scaled_resolve) { // Make sure all the scaled resolve memory is resident and accessible from // the shader, including any possible padding that hasn't yet been touched // by an actual resolve, but is still included in the texture size, so the // GPU won't be trying to access unmapped memory. if (!EnsureScaledResolveMemoryCommitted(texture_key.base_page << 12, texture.GetGuestBaseSize(), 4)) { continue; } if (!EnsureScaledResolveMemoryCommitted(texture_key.mip_page << 12, texture.GetGuestMipsSize(), 4)) { continue; } } // Actually load the texture data. if (!LoadTextureDataFromResidentMemoryImpl( texture, (index_base_outdated & (1ULL << i)) != 0, (index_mips_outdated & (1ULL << i)) != 0)) { continue; } // Update the source of the texture (resolve vs. CPU or memexport) for // purposes of handling piecewise gamma emulation via sRGB and for // resolution scale in sampling offsets. if (!texture_key.scaled_resolve) { texture.SetBaseResolved(base_resolved); texture.SetMipsResolved(mips_resolved); } // reque for makeuptodatandwatch textures[i] = &texture; } { auto crit = global_critical_region_.Acquire(); for (uint32_t i = 0; i < n_textures; ++i) { auto texture = textures[i]; if (!texture) { continue; } // Mark the ranges as uploaded and watch them. This is needed for scaled // resolves as well to detect when the CPU wants to reuse the memory for a // regular texture or a vertex buffer, and thus the scaled resolve version // is not up to date anymore. texture->MakeUpToDateAndWatch(crit); texture->LogAction("Loaded"); } } } bool TextureCache::LoadTextureData(Texture& texture) { // Check what needs to be uploaded. bool base_outdated, mips_outdated; { auto global_lock = global_critical_region_.Acquire(); base_outdated = texture.base_outdated(global_lock); mips_outdated = texture.mips_outdated(global_lock); } if (!base_outdated && !mips_outdated) { return true; } TextureKey texture_key = texture.key(); // Implementation may load multiple blocks at once via accesses of up to 128 // bits (R32G32B32A32_UINT), so aligning the size to this value to make sure // if the texture is small (especially if it's linear), the last blocks won't // be cut off (hosts may return 0, 0, 0, 0 for the whole R32G32B32A32_UINT // access for the non-16-aligned tail even if 1...15 bytes are actually // provided for it). // Request uploading of the texture data to the shared memory. // This is also necessary when resolution scaling is used - the texture cache // relies on shared memory for invalidation of both unscaled and scaled // textures. Plus a texture may be unscaled partially, when only a portion of // its pages is invalidated, in this case we'll need the texture from the // shared memory to load the unscaled parts. // TODO(Triang3l): Load unscaled parts. bool base_resolved = texture.GetBaseResolved(); if (base_outdated) { if (!shared_memory().RequestRange( texture_key.base_page << 12, xe::align(texture.GetGuestBaseSize(), UINT32_C(16)), texture_key.scaled_resolve ? nullptr : &base_resolved)) { return false; } } bool mips_resolved = texture.GetMipsResolved(); if (mips_outdated) { if (!shared_memory().RequestRange( texture_key.mip_page << 12, xe::align(texture.GetGuestMipsSize(), UINT32_C(16)), texture_key.scaled_resolve ? nullptr : &mips_resolved)) { return false; } } if (texture_key.scaled_resolve) { // Make sure all the scaled resolve memory is resident and accessible from // the shader, including any possible padding that hasn't yet been touched // by an actual resolve, but is still included in the texture size, so the // GPU won't be trying to access unmapped memory. if (!EnsureScaledResolveMemoryCommitted(texture_key.base_page << 12, texture.GetGuestBaseSize(), 4)) { return false; } if (!EnsureScaledResolveMemoryCommitted(texture_key.mip_page << 12, texture.GetGuestMipsSize(), 4)) { return false; } } // Actually load the texture data. if (!LoadTextureDataFromResidentMemoryImpl(texture, base_outdated, mips_outdated)) { return false; } // Update the source of the texture (resolve vs. CPU or memexport) for // purposes of handling piecewise gamma emulation via sRGB and for resolution // scale in sampling offsets. if (!texture_key.scaled_resolve) { texture.SetBaseResolved(base_resolved); texture.SetMipsResolved(mips_resolved); } // Mark the ranges as uploaded and watch them. This is needed for scaled // resolves as well to detect when the CPU wants to reuse the memory for a // regular texture or a vertex buffer, and thus the scaled resolve version is // not up to date anymore. texture.MakeUpToDateAndWatch(global_critical_region_.Acquire()); texture.LogAction("Loaded"); return true; } void TextureCache::BindingInfoFromFetchConstant( const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out, uint8_t* swizzled_signs_out) { // Reset the key and the signedness. key_out.MakeInvalid(); if (swizzled_signs_out != nullptr) { *swizzled_signs_out = uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101); } switch (fetch.type) { case xenos::FetchConstantType::kTexture: break; case xenos::FetchConstantType::kInvalidTexture: if (cvars::gpu_allow_invalid_fetch_constants) { break; } XELOGW( "Texture fetch constant ({:08X} {:08X} {:08X} {:08X} {:08X} {:08X}) " "has \"invalid\" type! This is incorrect behavior, but you can try " "bypassing this by launching Xenia with " "--gpu_allow_invalid_fetch_constants=true.", fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3, fetch.dword_4, fetch.dword_5); return; default: XELOGW( "Texture fetch constant ({:08X} {:08X} {:08X} {:08X} {:08X} {:08X}) " "is completely invalid!", fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3, fetch.dword_4, fetch.dword_5); return; } uint32_t width_minus_1, height_minus_1, depth_or_array_size_minus_1; uint32_t base_page, mip_page, mip_max_level; texture_util::GetSubresourcesFromFetchConstant( fetch, &width_minus_1, &height_minus_1, &depth_or_array_size_minus_1, &base_page, &mip_page, nullptr, &mip_max_level); if (base_page == 0 && mip_page == 0) { // No texture data at all. return; } if (fetch.dimension == xenos::DataDimension::k1D) { bool is_invalid_1d = false; // TODO(Triang3l): Support long 1D textures. if (width_minus_1 >= xenos::kTexture2DCubeMaxWidthHeight) { XELOGE( "1D texture is too wide ({}) - ignoring! Report the game to Xenia " "developers", width_minus_1 + 1); is_invalid_1d = true; } assert_false(fetch.tiled); if (fetch.tiled) { XELOGE( "1D texture has tiling enabled in the fetch constant, but this " "appears to be completely wrong - ignoring! Report the game to Xenia " "developers"); is_invalid_1d = true; } assert_false(fetch.packed_mips); if (fetch.packed_mips) { XELOGE( "1D texture has packed mips enabled in the fetch constant, but this " "appears to be completely wrong - ignoring! Report the game to Xenia " "developers"); is_invalid_1d = true; } if (is_invalid_1d) { return; } } xenos::TextureFormat format = GetBaseFormat(fetch.format); key_out.base_page = base_page; key_out.mip_page = mip_page; key_out.dimension = fetch.dimension; key_out.width_minus_1 = width_minus_1; key_out.height_minus_1 = height_minus_1; key_out.depth_or_array_size_minus_1 = depth_or_array_size_minus_1; key_out.pitch = fetch.pitch; key_out.mip_max_level = mip_max_level; key_out.tiled = fetch.tiled; key_out.packed_mips = fetch.packed_mips; key_out.format = format; key_out.endianness = fetch.endianness; key_out.is_valid = 1; if (swizzled_signs_out != nullptr) { *swizzled_signs_out = texture_util::SwizzleSigns(fetch); } } void TextureCache::ResetTextureBindings(bool from_destructor) { uint32_t bindings_reset = 0; for (size_t i = 0; i < texture_bindings_.size(); ++i) { TextureBinding& binding = texture_bindings_[i]; if (!binding.key.is_valid) { continue; } binding.Reset(); bindings_reset |= UINT32_C(1) << i; } texture_bindings_in_sync_ &= ~bindings_reset; if (!from_destructor && bindings_reset) { UpdateTextureBindingsImpl(bindings_reset); } } void TextureCache::UpdateTexturesTotalHostMemoryUsage(uint64_t add, uint64_t subtract) { textures_total_host_memory_usage_ = textures_total_host_memory_usage_ - subtract + add; COUNT_profile_set("gpu/texture_cache/total_host_memory_usage_mb", uint32_t((textures_total_host_memory_usage_ + ((UINT32_C(1) << 20) - 1)) >> 20)); } bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled, uint32_t length_unscaled) { if (!IsDrawResolutionScaled()) { return false; } start_unscaled = std::min(start_unscaled, SharedMemory::kBufferSize); length_unscaled = std::min(length_unscaled, SharedMemory::kBufferSize - start_unscaled); if (!length_unscaled) { return false; } // Two-level check for faster rejection since resolve targets are usually // placed in relatively small and localized memory portions (confirmed by // testing - pretty much all times the deeper level was entered, the texture // was a resolve target). uint32_t page_first = start_unscaled >> 12; uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12; uint32_t block_first = page_first >> 5; uint32_t block_last = page_last >> 5; uint32_t l2_block_first = block_first >> 6; uint32_t l2_block_last = block_last >> 6; auto global_lock = global_critical_region_.Acquire(); for (uint32_t i = l2_block_first; i <= l2_block_last; ++i) { uint64_t l2_block = scaled_resolve_pages_l2_[i]; if (i == l2_block_first) { l2_block &= ~((UINT64_C(1) << (block_first & 63)) - 1); } if (i == l2_block_last && (block_last & 63) != 63) { l2_block &= (UINT64_C(1) << ((block_last & 63) + 1)) - 1; } uint32_t block_relative_index; while (xe::bit_scan_forward(l2_block, &block_relative_index)) { l2_block &= ~(UINT64_C(1) << block_relative_index); uint32_t block_index = (i << 6) + block_relative_index; uint32_t check_bits = UINT32_MAX; if (block_index == block_first) { check_bits &= ~((UINT32_C(1) << (page_first & 31)) - 1); } if (block_index == block_last && (page_last & 31) != 31) { check_bits &= (UINT32_C(1) << ((page_last & 31) + 1)) - 1; } if (scaled_resolve_pages_[block_index] & check_bits) { return true; } } } return false; } void TextureCache::ScaledResolveGlobalWatchCallbackThunk( const global_unique_lock_type& global_lock, void* context, uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) { TextureCache* texture_cache = reinterpret_cast(context); texture_cache->ScaledResolveGlobalWatchCallback( global_lock, address_first, address_last, invalidated_by_gpu); } void TextureCache::ScaledResolveGlobalWatchCallback( const global_unique_lock_type& global_lock, uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) { assert_true(IsDrawResolutionScaled()); if (invalidated_by_gpu) { // Resolves themselves do exactly the opposite of what this should do. return; } // Mark scaled resolve ranges as non-scaled. Textures themselves will be // invalidated by their shared memory watches. uint32_t resolve_page_first = address_first >> 12; uint32_t resolve_page_last = address_last >> 12; uint32_t resolve_block_first = resolve_page_first >> 5; uint32_t resolve_block_last = resolve_page_last >> 5; uint32_t resolve_l2_block_first = resolve_block_first >> 6; uint32_t resolve_l2_block_last = resolve_block_last >> 6; for (uint32_t i = resolve_l2_block_first; i <= resolve_l2_block_last; ++i) { uint64_t resolve_l2_block = scaled_resolve_pages_l2_[i]; uint32_t resolve_block_relative_index; while ( xe::bit_scan_forward(resolve_l2_block, &resolve_block_relative_index)) { resolve_l2_block &= ~(UINT64_C(1) << resolve_block_relative_index); uint32_t resolve_block_index = (i << 6) + resolve_block_relative_index; uint32_t resolve_keep_bits = 0; if (resolve_block_index == resolve_block_first) { resolve_keep_bits |= (UINT32_C(1) << (resolve_page_first & 31)) - 1; } if (resolve_block_index == resolve_block_last && (resolve_page_last & 31) != 31) { resolve_keep_bits |= ~((UINT32_C(1) << ((resolve_page_last & 31) + 1)) - 1); } scaled_resolve_pages_[resolve_block_index] &= resolve_keep_bits; if (scaled_resolve_pages_[resolve_block_index] == 0) { scaled_resolve_pages_l2_[i] &= ~(UINT64_C(1) << resolve_block_relative_index); } } } } } // namespace gpu } // namespace xe