Merge pull request #7925 from stenzek/xfb-stride

TextureCache: Simplify XFB reconstruction
This commit is contained in:
JMC47 2019-04-20 23:21:42 -04:00 committed by GitHub
commit 18589e50ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 311 additions and 495 deletions

View File

@ -686,6 +686,9 @@ static void BeginField(FieldType field, u64 ticks)
xfbAddr = GetXFBAddressTop(); xfbAddr = GetXFBAddressTop();
} }
// Multiply the stride by 2 to get the byte offset for each subsequent line.
fbStride *= 2;
if (potentially_interlaced_xfb && interlaced_video_mode && g_ActiveConfig.bForceProgressive) if (potentially_interlaced_xfb && interlaced_video_mode && g_ActiveConfig.bForceProgressive)
{ {
// Strictly speaking, in interlaced mode, we're only supposed to read // Strictly speaking, in interlaced mode, we're only supposed to read
@ -704,10 +707,10 @@ static void BeginField(FieldType field, u64 ticks)
// offset the xfb by (-stride_of_one_line) to get the start // offset the xfb by (-stride_of_one_line) to get the start
// address of the full xfb. // address of the full xfb.
if (field == FieldType::Odd && m_VBlankTimingOdd.PRB == m_VBlankTimingEven.PRB + 1 && xfbAddr) if (field == FieldType::Odd && m_VBlankTimingOdd.PRB == m_VBlankTimingEven.PRB + 1 && xfbAddr)
xfbAddr -= fbStride * 2; xfbAddr -= fbStride;
if (field == FieldType::Even && m_VBlankTimingOdd.PRB == m_VBlankTimingEven.PRB - 1 && xfbAddr) if (field == FieldType::Even && m_VBlankTimingOdd.PRB == m_VBlankTimingEven.PRB - 1 && xfbAddr)
xfbAddr -= fbStride * 2; xfbAddr -= fbStride;
} }
LogField(field, xfbAddr); LogField(field, xfbAddr);
@ -787,10 +790,8 @@ void Update(u64 ticks)
} }
// Create a fake VI mode for a fifolog // Create a fake VI mode for a fifolog
void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height) void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_stride, u32 fb_height)
{ {
u32 fb_stride = fb_width;
bool interlaced = fb_height > 480 / 2; bool interlaced = fb_height > 480 / 2;
if (interlaced) if (interlaced)
{ {
@ -807,7 +808,7 @@ void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height)
m_VBlankTimingEven.PRB = 503 - fb_height * 2; m_VBlankTimingEven.PRB = 503 - fb_height * 2;
m_VBlankTimingEven.PSB = 4; m_VBlankTimingEven.PSB = 4;
m_PictureConfiguration.WPL = fb_width / 16; m_PictureConfiguration.WPL = fb_width / 16;
m_PictureConfiguration.STD = fb_stride / 16; m_PictureConfiguration.STD = (fb_stride / 2) / 16;
UpdateParameters(); UpdateParameters();

View File

@ -374,6 +374,6 @@ u32 GetTicksPerField();
float GetAspectRatio(); float GetAspectRatio();
// Create a fake VI mode for a fifolog // Create a fake VI mode for a fifolog
void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height); void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_stride, u32 fb_height);
} // namespace VideoInterface } // namespace VideoInterface

View File

@ -22,14 +22,14 @@ public:
protected: protected:
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top,
const EFBCopyFilterCoefficients& filter_coefficients) override bool clamp_bottom, const EFBCopyFilterCoefficients& filter_coefficients) override
{ {
} }
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format,
float gamma, bool clamp_top, bool clamp_bottom, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override const EFBCopyFilterCoefficients& filter_coefficients) override
{ {
} }

View File

@ -12,15 +12,15 @@ class TextureCache : public TextureCacheBase
protected: protected:
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top,
const EFBCopyFilterCoefficients& filter_coefficients) override bool clamp_bottom, const EFBCopyFilterCoefficients& filter_coefficients) override
{ {
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
src_rect, scale_by_half, y_scale, gamma); src_rect, scale_by_half, y_scale, gamma);
} }
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format,
float gamma, bool clamp_top, bool clamp_bottom, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override const EFBCopyFilterCoefficients& filter_coefficients) override
{ {
// TODO: If we ever want to "fake" vram textures, we would need to implement this // TODO: If we ever want to "fake" vram textures, we would need to implement this

View File

@ -113,7 +113,6 @@ void AsyncRequests::SetEnable(bool enable)
void AsyncRequests::HandleEvent(const AsyncRequests::Event& e) void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)
{ {
EFBRectangle rc;
switch (e.type) switch (e.type)
{ {
case Event::EFB_POKE_COLOR: case Event::EFB_POKE_COLOR:
@ -145,7 +144,7 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)
case Event::SWAP_EVENT: case Event::SWAP_EVENT:
g_renderer->Swap(e.swap_event.xfbAddr, e.swap_event.fbWidth, e.swap_event.fbStride, g_renderer->Swap(e.swap_event.xfbAddr, e.swap_event.fbWidth, e.swap_event.fbStride,
e.swap_event.fbHeight, rc, e.time); e.swap_event.fbHeight, e.time);
break; break;
case Event::BBOX_READ: case Event::BBOX_READ:

View File

@ -312,14 +312,13 @@ static void BPWritten(const BPCmd& bp)
if (g_ActiveConfig.bImmediateXFB) if (g_ActiveConfig.bImmediateXFB)
{ {
// below div two to convert from bytes to pixels - it expects width, not stride // below div two to convert from bytes to pixels - it expects width, not stride
g_renderer->Swap(destAddr, destStride / 2, destStride / 2, height, srcRect, g_renderer->Swap(destAddr, destStride / 2, destStride, height, CoreTiming::GetTicks());
CoreTiming::GetTicks());
} }
else else
{ {
if (FifoPlayer::GetInstance().IsRunningWithFakeVideoInterfaceUpdates()) if (FifoPlayer::GetInstance().IsRunningWithFakeVideoInterfaceUpdates())
{ {
VideoInterface::FakeVIUpdate(destAddr, srcRect.GetWidth(), height); VideoInterface::FakeVIUpdate(destAddr, srcRect.GetWidth(), destStride, height);
} }
} }
} }

View File

@ -1154,8 +1154,7 @@ void Renderer::EndUIFrame()
BeginImGuiFrame(); BeginImGuiFrame();
} }
void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks)
u64 ticks)
{ {
const AspectMode suggested = g_ActiveConfig.suggested_aspect_mode; const AspectMode suggested = g_ActiveConfig.suggested_aspect_mode;
if (suggested == AspectMode::Analog || suggested == AspectMode::AnalogWide) if (suggested == AspectMode::Analog || suggested == AspectMode::AnalogWide)
@ -1188,33 +1187,15 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
// behind the renderer. // behind the renderer.
FlushFrameDump(); FlushFrameDump();
if (xfbAddr && fbWidth && fbStride && fbHeight) if (xfb_addr && fb_width && fb_stride && fb_height)
{ {
constexpr int force_safe_texture_cache_hash = 0;
// Get the current XFB from texture cache // Get the current XFB from texture cache
auto* xfb_entry = g_texture_cache->GetXFBTexture( MathUtil::Rectangle<int> xfb_rect;
xfbAddr, fbStride, fbHeight, TextureFormat::XFB, force_safe_texture_cache_hash); const auto* xfb_entry =
g_texture_cache->GetXFBTexture(xfb_addr, fb_width, fb_height, fb_stride, &xfb_rect);
if (xfb_entry && xfb_entry->id != m_last_xfb_id) if (xfb_entry && xfb_entry->id != m_last_xfb_id)
{ {
const TextureConfig& texture_config = xfb_entry->texture->GetConfig();
m_last_xfb_texture = xfb_entry->texture.get();
m_last_xfb_id = xfb_entry->id; m_last_xfb_id = xfb_entry->id;
m_last_xfb_ticks = ticks;
auto xfb_rect = texture_config.GetRect();
// It's possible that the returned XFB texture is native resolution
// even when we're rendering at higher than native resolution
// if the XFB was was loaded entirely from console memory.
// If so, adjust the rectangle by native resolution instead of scaled resolution.
const u32 native_stride_width_difference = fbStride - fbWidth;
if (texture_config.width == xfb_entry->native_width)
xfb_rect.right -= native_stride_width_difference;
else
xfb_rect.right -= EFBToScaledX(native_stride_width_difference);
m_last_xfb_region = xfb_rect;
// Since we use the common pipelines here and draw vertices if a batch is currently being // Since we use the common pipelines here and draw vertices if a batch is currently being
// built by the vertex loader, we end up trampling over its pointer, as we share the buffer // built by the vertex loader, we end up trampling over its pointer, as we share the buffer
@ -1247,7 +1228,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
// Update the window size based on the frame that was just rendered. // Update the window size based on the frame that was just rendered.
// Due to depending on guest state, we need to call this every frame. // Due to depending on guest state, we need to call this every frame.
SetWindowSize(texture_config.width, texture_config.height); SetWindowSize(xfb_rect.GetWidth(), xfb_rect.GetHeight());
} }
m_fps_counter.Update(); m_fps_counter.Update();
@ -1259,7 +1240,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
DolphinAnalytics::Instance()->ReportPerformanceInfo(std::move(perf_sample)); DolphinAnalytics::Instance()->ReportPerformanceInfo(std::move(perf_sample));
if (IsFrameDumping()) if (IsFrameDumping())
DumpCurrentFrame(); DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks);
// Begin new frame // Begin new frame
m_frame_count++; m_frame_count++;
@ -1295,8 +1276,8 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
} }
// Update our last xfb values // Update our last xfb values
m_last_xfb_width = (fbStride < 1 || fbStride > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fbStride; m_last_xfb_width = (fb_width < 1 || fb_width > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_width;
m_last_xfb_height = (fbHeight < 1 || fbHeight > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fbHeight; m_last_xfb_height = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height;
} }
else else
{ {
@ -1333,8 +1314,11 @@ bool Renderer::IsFrameDumping()
return false; return false;
} }
void Renderer::DumpCurrentFrame() void Renderer::DumpCurrentFrame(const AbstractTexture* src_texture,
const MathUtil::Rectangle<int>& src_rect, u64 ticks)
{ {
int source_width = src_rect.GetWidth();
int source_height = src_rect.GetHeight();
int target_width, target_height; int target_width, target_height;
if (!g_ActiveConfig.bInternalResolutionFrameDumps && !IsHeadless()) if (!g_ActiveConfig.bInternalResolutionFrameDumps && !IsHeadless())
{ {
@ -1344,22 +1328,20 @@ void Renderer::DumpCurrentFrame()
} }
else else
{ {
std::tie(target_width, target_height) = CalculateOutputDimensions( std::tie(target_width, target_height) = CalculateOutputDimensions(source_width, source_height);
m_last_xfb_texture->GetConfig().width, m_last_xfb_texture->GetConfig().height);
} }
// We only need to render a copy if we need to stretch/scale the XFB copy. // We only need to render a copy if we need to stretch/scale the XFB copy.
const AbstractTexture* source_tex = m_last_xfb_texture; MathUtil::Rectangle<int> copy_rect = src_rect;
MathUtil::Rectangle<int> source_rect = m_last_xfb_region; if (source_width != target_width || source_height != target_height)
if (source_rect.GetWidth() != target_width || source_rect.GetHeight() != target_height)
{ {
if (!CheckFrameDumpRenderTexture(target_width, target_height)) if (!CheckFrameDumpRenderTexture(target_width, target_height))
return; return;
source_tex = m_frame_dump_render_texture.get(); ScaleTexture(m_frame_dump_render_framebuffer.get(), m_frame_dump_render_framebuffer->GetRect(),
source_rect = MathUtil::Rectangle<int>(0, 0, target_width, target_height); src_texture, src_rect);
ScaleTexture(m_frame_dump_render_framebuffer.get(), source_rect, m_last_xfb_texture, src_texture = m_frame_dump_render_texture.get();
m_last_xfb_region); copy_rect = src_texture->GetRect();
} }
// Index 0 was just sent to AVI dump. Swap with the second texture. // Index 0 was just sent to AVI dump. Swap with the second texture.
@ -1369,12 +1351,9 @@ void Renderer::DumpCurrentFrame()
if (!CheckFrameDumpReadbackTexture(target_width, target_height)) if (!CheckFrameDumpReadbackTexture(target_width, target_height))
return; return;
const auto converted_region = m_frame_dump_readback_textures[0]->CopyFromTexture(src_texture, copy_rect, 0, 0,
ConvertFramebufferRectangle(source_rect, source_tex->GetWidth(), source_tex->GetHeight()); m_frame_dump_readback_textures[0]->GetRect());
m_frame_dump_readback_textures[0]->CopyFromTexture( m_last_frame_state = AVIDump::FetchState(ticks);
source_tex, converted_region, 0, 0,
MathUtil::Rectangle<int>(0, 0, target_width, target_height));
m_last_frame_state = AVIDump::FetchState(m_last_xfb_ticks);
m_last_frame_exported = true; m_last_frame_exported = true;
} }

View File

@ -213,8 +213,7 @@ public:
virtual void WaitForGPUIdle() {} virtual void WaitForGPUIdle() {}
// Finish up the current frame, print some stats // Finish up the current frame, print some stats
void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, void Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks);
u64 ticks);
// Draws the specified XFB buffer to the screen, performing any post-processing. // Draws the specified XFB buffer to the screen, performing any post-processing.
// Assumes that the backbuffer has already been bound and cleared. // Assumes that the backbuffer has already been bound and cleared.
@ -350,10 +349,7 @@ private:
bool m_last_frame_exported = false; bool m_last_frame_exported = false;
// Tracking of XFB textures so we don't render duplicate frames. // Tracking of XFB textures so we don't render duplicate frames.
AbstractTexture* m_last_xfb_texture = nullptr;
u64 m_last_xfb_id = std::numeric_limits<u64>::max(); u64 m_last_xfb_id = std::numeric_limits<u64>::max();
u64 m_last_xfb_ticks = 0;
EFBRectangle m_last_xfb_region;
// Note: Only used for auto-ir // Note: Only used for auto-ir
u32 m_last_xfb_width = MAX_XFB_WIDTH; u32 m_last_xfb_width = MAX_XFB_WIDTH;
@ -377,7 +373,8 @@ private:
bool CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height); bool CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height);
// Fills the frame dump staging texture with the current XFB texture. // Fills the frame dump staging texture with the current XFB texture.
void DumpCurrentFrame(); void DumpCurrentFrame(const AbstractTexture* src_texture,
const MathUtil::Rectangle<int>& src_rect, u64 ticks);
// Asynchronously encodes the specified pointer of frame data to the frame dump. // Asynchronously encodes the specified pointer of frame data to the frame dump.
void DumpFrameData(const u8* data, int w, int h, int stride, const AVIDump::Frame& state); void DumpFrameData(const u8* data, int w, int h, int stride, const AVIDump::Frame& state);

View File

@ -393,18 +393,6 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
dst_y = 0; dst_y = 0;
} }
// If the source rectangle is outside of what we actually have in VRAM, skip the copy.
// The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates
// to the graphics driver, which can cause GPU resets.
if (static_cast<u32>(src_x) >= entry->native_width ||
static_cast<u32>(src_y) >= entry->native_height ||
static_cast<u32>(dst_x) >= entry_to_update->native_width ||
static_cast<u32>(dst_y) >= entry_to_update->native_height)
{
++iter.first;
continue;
}
u32 copy_width = u32 copy_width =
std::min(entry->native_width - src_x, entry_to_update->native_width - dst_x); std::min(entry->native_width - src_x, entry_to_update->native_width - dst_x);
u32 copy_height = u32 copy_height =
@ -429,6 +417,18 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
copy_height = g_renderer->EFBToScaledY(copy_height); copy_height = g_renderer->EFBToScaledY(copy_height);
} }
// If the source rectangle is outside of what we actually have in VRAM, skip the copy.
// The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates
// to the graphics driver, which can cause GPU resets.
if (static_cast<u32>(src_x + copy_width) > entry->GetWidth() ||
static_cast<u32>(src_y + copy_height) > entry->GetHeight() ||
static_cast<u32>(dst_x + copy_width) > entry_to_update->GetWidth() ||
static_cast<u32>(dst_y + copy_height) > entry_to_update->GetHeight())
{
++iter.first;
continue;
}
MathUtil::Rectangle<int> srcrect, dstrect; MathUtil::Rectangle<int> srcrect, dstrect;
srcrect.left = src_x; srcrect.left = src_x;
srcrect.top = src_y; srcrect.top = src_y;
@ -471,6 +471,7 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
} }
++iter.first; ++iter.first;
} }
return entry_to_update; return entry_to_update;
} }
@ -952,7 +953,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
entry->native_height == nativeH) entry->native_height == nativeH)
{ {
entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt); entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt);
entry->texture->FinishedRendering();
return entry; return entry;
} }
} }
@ -1003,7 +1004,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
entry->native_width == nativeW && entry->native_height == nativeH) entry->native_width == nativeW && entry->native_height == nativeH)
{ {
entry = DoPartialTextureUpdates(hash_iter->second, &texMem[tlutaddr], tlutfmt); entry = DoPartialTextureUpdates(hash_iter->second, &texMem[tlutaddr], tlutfmt);
entry->texture->FinishedRendering();
return entry; return entry;
} }
++hash_iter; ++hash_iter;
@ -1209,142 +1210,115 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
return entry; return entry;
} }
TextureCacheBase::TCacheEntry* static void GetDisplayRectForXFBEntry(TextureCacheBase::TCacheEntry* entry, u32 width, u32 height,
TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, TextureFormat tex_format, MathUtil::Rectangle<int>* display_rect)
int texture_cache_safety_color_sample_size)
{ {
auto tex_info = ComputeTextureInformation(address, width, height, tex_format, // Scale the sub-rectangle to the full resolution of the texture.
texture_cache_safety_color_sample_size, false, 0, 0, 0, display_rect->left = 0;
TLUTFormat::IA8, 1); display_rect->top = 0;
if (!tex_info) display_rect->right = static_cast<int>(width * entry->GetWidth() / entry->native_width);
return nullptr; display_rect->bottom = static_cast<int>(height * entry->GetHeight() / entry->native_height);
}
// Try a direct lookup by address/hash. TextureCacheBase::TCacheEntry*
const TextureLookupInformation tex_info_value = tex_info.value(); TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
TCacheEntry* entry = GetXFBFromCache(tex_info_value); MathUtil::Rectangle<int>* display_rect)
if (entry) {
return entry; const u8* src_data = Memory::GetPointer(address);
if (!src_data)
// At this point, the XFB wasn't found in cache. This means the address is most likely not
// pointing at an xfb copy but instead an area of memory. Let's attempt to stitch all entries in
// this memory space together
bool loaded_from_overlapping = true;
entry = GetTextureFromOverlappingTextures(tex_info_value);
if (!entry)
{ {
// At this point, the xfb address is truly "bogus" it likely is an area of memory defined by the ERROR_LOG(VIDEO, "Trying to load XFB texture from invalid address 0x%8x", address);
// CPU, so load it from memory. return nullptr;
entry = GetTextureFromMemory(tex_info_value);
loaded_from_overlapping = false;
} }
// Compute total texture size. XFB textures aren't tiled, so this is simple.
const u32 total_size = height * stride;
const u64 hash = Common::GetHash64(src_data, total_size, 0);
// Do we currently have a version of this XFB copy in VRAM?
TCacheEntry* entry = GetXFBFromCache(address, width, height, stride, hash);
if (entry)
{
if (entry->is_xfb_container)
{
StitchXFBCopy(entry);
entry->texture->FinishedRendering();
}
GetDisplayRectForXFBEntry(entry, width, height, display_rect);
return entry;
}
// Create a new VRAM texture, and fill it with the data from guest RAM.
entry = AllocateCacheEntry(TextureConfig(width, height, 1, 1, 1, AbstractTextureFormat::RGBA8,
AbstractTextureFlag_RenderTarget));
entry->SetGeneralParameters(address, total_size,
TextureAndTLUTFormat(TextureFormat::XFB, TLUTFormat::IA8), true);
entry->SetDimensions(width, height, 1);
entry->SetHashes(hash, hash);
entry->SetXfbCopy(stride);
entry->is_xfb_container = true;
entry->is_custom_tex = false;
entry->may_have_overlapping_textures = false;
entry->frameCount = FRAMECOUNT_INVALID;
if (!g_ActiveConfig.UseGPUTextureDecoding() ||
!DecodeTextureOnGPU(entry, 0, src_data, total_size, entry->format.texfmt, width, height,
width, height, stride, texMem, entry->format.tlutfmt))
{
const u32 decoded_size = width * height * sizeof(u32);
CheckTempSize(decoded_size);
TexDecoder_DecodeXFB(temp, src_data, width, height, stride);
entry->texture->Load(0, width, height, width, temp, decoded_size);
}
// Stitch any VRAM copies into the new RAM copy.
StitchXFBCopy(entry);
entry->texture->FinishedRendering();
// Insert into the texture cache so we can re-use it next frame, if needed.
textures_by_address.emplace(entry->addr, entry);
SETSTAT(stats.numTexturesAlive, textures_by_address.size());
INCSTAT(stats.numTexturesUploaded);
if (g_ActiveConfig.bDumpXFBTarget) if (g_ActiveConfig.bDumpXFBTarget)
{ {
// While this isn't really an xfb copy, we can treat it as such // While this isn't really an xfb copy, we can treat it as such
// for dumping purposes // for dumping purposes
static int xfb_count = 0; static int xfb_count = 0;
const std::string xfb_type = loaded_from_overlapping ? "combined" : "from_memory"; entry->texture->Save(StringFromFormat("loaded_xfb_%i.png",
entry->texture->Save(StringFromFormat("%sxfb_%s_%i.png",
File::GetUserPath(D_DUMPTEXTURES_IDX).c_str(), File::GetUserPath(D_DUMPTEXTURES_IDX).c_str(),
xfb_type.c_str(), xfb_count++), xfb_count++),
0); 0);
} }
GetDisplayRectForXFBEntry(entry, width, height, display_rect);
return entry; return entry;
} }
std::optional<TextureLookupInformation> TextureCacheBase::ComputeTextureInformation( TextureCacheBase::TCacheEntry* TextureCacheBase::GetXFBFromCache(u32 address, u32 width, u32 height,
u32 address, u32 width, u32 height, TextureFormat tex_format, u32 stride, u64 hash)
int texture_cache_safety_color_sample_size, bool from_tmem, u32 tmem_address_even,
u32 tmem_address_odd, u32 tlut_address, TLUTFormat tlut_format, u32 levels)
{ {
TextureLookupInformation tex_info; auto iter_range = textures_by_address.equal_range(address);
tex_info.from_tmem = from_tmem;
tex_info.tmem_address_even = tmem_address_even;
tex_info.tmem_address_odd = tmem_address_odd;
tex_info.address = address;
if (from_tmem)
tex_info.src_data = &texMem[tex_info.tmem_address_even];
else
tex_info.src_data = Memory::GetPointer(tex_info.address);
if (tex_info.src_data == nullptr)
{
ERROR_LOG(VIDEO, "Trying to use an invalid texture address 0x%8x", tex_info.address);
return {};
}
tex_info.texture_cache_safety_color_sample_size = texture_cache_safety_color_sample_size;
// TexelSizeInNibbles(format) * width * height / 16;
tex_info.block_width = TexDecoder_GetBlockWidthInTexels(tex_format);
tex_info.block_height = TexDecoder_GetBlockHeightInTexels(tex_format);
tex_info.bytes_per_block = (tex_info.block_width * tex_info.block_height *
TexDecoder_GetTexelSizeInNibbles(tex_format)) /
2;
tex_info.expanded_width = Common::AlignUp(width, tex_info.block_width);
tex_info.expanded_height = Common::AlignUp(height, tex_info.block_height);
tex_info.total_bytes = TexDecoder_GetTextureSizeInBytes(tex_info.expanded_width,
tex_info.expanded_height, tex_format);
tex_info.native_width = width;
tex_info.native_height = height;
tex_info.native_levels = levels;
// GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in
// the mipmap chain
// e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we
// limit the mipmap count to 6 there
tex_info.computed_levels = std::min<u32>(
IntLog2(std::max(tex_info.native_width, tex_info.native_height)) + 1, tex_info.native_levels);
tex_info.full_format = TextureAndTLUTFormat(tex_format, tlut_format);
tex_info.tlut_address = tlut_address;
// TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data
// from the low tmem bank than it should)
tex_info.base_hash = Common::GetHash64(tex_info.src_data, tex_info.total_bytes,
tex_info.texture_cache_safety_color_sample_size);
tex_info.is_palette_texture = IsColorIndexed(tex_format);
if (tex_info.is_palette_texture)
{
tex_info.palette_size = TexDecoder_GetPaletteSize(tex_format);
tex_info.full_hash = tex_info.base_hash ^
Common::GetHash64(&texMem[tex_info.tlut_address], tex_info.palette_size,
tex_info.texture_cache_safety_color_sample_size);
}
else
{
tex_info.full_hash = tex_info.base_hash;
}
return tex_info;
}
TextureCacheBase::TCacheEntry*
TextureCacheBase::GetXFBFromCache(const TextureLookupInformation& tex_info)
{
auto iter_range = textures_by_address.equal_range(tex_info.address);
TexAddrCache::iterator iter = iter_range.first; TexAddrCache::iterator iter = iter_range.first;
while (iter != iter_range.second) while (iter != iter_range.second)
{ {
TCacheEntry* entry = iter->second; TCacheEntry* entry = iter->second;
if ((entry->is_xfb_copy || entry->format.texfmt == TextureFormat::XFB) && // The only thing which has to match exactly is the stride. We can use a partial rectangle if
entry->native_width == tex_info.native_width && // the VI width/height differs from that of the XFB copy.
entry->native_height == tex_info.native_height && if (entry->is_xfb_copy && entry->memory_stride == stride && entry->native_width >= width &&
entry->memory_stride == entry->BytesPerRow() && !entry->may_have_overlapping_textures) entry->native_height >= height && !entry->may_have_overlapping_textures)
{ {
if (tex_info.base_hash == entry->hash && !entry->reference_changed) // But if the dimensions do differ, we must compute the hash on the sub-rectangle.
u64 check_hash = hash;
if (entry->native_width != width || entry->native_height != height)
{
check_hash = Common::GetHash64(Memory::GetPointer(entry->addr),
entry->memory_stride * entry->native_height, 0);
}
if (entry->hash == check_hash && !entry->reference_changed)
{ {
return entry; return entry;
} }
@ -1364,37 +1338,36 @@ TextureCacheBase::GetXFBFromCache(const TextureLookupInformation& tex_info)
return nullptr; return nullptr;
} }
TextureCacheBase::TCacheEntry* void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformation& tex_info)
{ {
u32 numBlocksX = tex_info.native_width / tex_info.block_width; // It is possible that some of the overlapping textures overlap each other. This behavior has been
// seen with XFB copies in Rogue Leader. To get the correct result, we apply the texture updates
// XFBs created for the purpose of being a container for textures from memory // in the order the textures were originally loaded. This ensures that the parts of the texture
// or as a container for overlapping textures, never need to be combined // that would have been overwritten in memory on real hardware get overwritten the same way here
// with other textures // too. This should work, but it may be a better idea to keep track of partial XFB copy
TCacheEntry* stitched_entry = // invalidations instead, which would reduce the amount of copying work here.
CreateNormalTexture(tex_info, g_framebuffer_manager->GetEFBLayers());
stitched_entry->may_have_overlapping_textures = false;
// It is possible that some of the overlapping textures overlap each other.
// This behavior has been seen with XFB copies in Rogue Leader.
// To get the correct result, we apply the texture updates in the order the textures were
// originally loaded. This ensures that the parts of the texture that would have been overwritten
// in memory on real hardware get overwritten the same way here too.
// This should work, but it may be a better idea to keep track of partial XFB copy invalidations
// instead, which would reduce the amount of copying work here.
std::vector<TCacheEntry*> candidates; std::vector<TCacheEntry*> candidates;
bool create_upscaled_copy = false;
auto iter = FindOverlappingTextures(tex_info.address, tex_info.total_bytes); auto iter = FindOverlappingTextures(stitched_entry->addr, stitched_entry->size_in_bytes);
while (iter.first != iter.second) while (iter.first != iter.second)
{ {
// Currently, this checks the stride of the VRAM copy against the VI request. Therefore, for
// interlaced modes, VRAM copies won't be considered candidates. This is okay for now, because
// our force progressive hack means that an XFB copy should always have a matching stride. If
// the hack is disabled, XFB2RAM should also be enabled. Should we wish to implement interlaced
// stitching in the future, this would require a shader which grabs every second line.
TCacheEntry* entry = iter.first->second; TCacheEntry* entry = iter.first->second;
if (entry->IsCopy() && !entry->tmem_only && if (entry != stitched_entry && entry->IsCopy() && !entry->tmem_only &&
entry->OverlapsMemoryRange(tex_info.address, tex_info.total_bytes) && entry->OverlapsMemoryRange(stitched_entry->addr, stitched_entry->size_in_bytes) &&
entry->memory_stride == stitched_entry->memory_stride) entry->memory_stride == stitched_entry->memory_stride)
{ {
if (entry->hash == entry->CalculateHash()) if (entry->hash == entry->CalculateHash())
{ {
// Can't check the height here because of Y scaling.
if (entry->native_width != entry->GetWidth())
create_upscaled_copy = true;
candidates.emplace_back(entry); candidates.emplace_back(entry);
} }
else else
@ -1407,219 +1380,108 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati
++iter.first; ++iter.first;
} }
if (candidates.empty())
return;
std::sort(candidates.begin(), candidates.end(), std::sort(candidates.begin(), candidates.end(),
[](const TCacheEntry* a, const TCacheEntry* b) { return a->id < b->id; }); [](const TCacheEntry* a, const TCacheEntry* b) { return a->id < b->id; });
bool updated_entry = false; // We only upscale when necessary to preserve resolution. i.e. when there are upscaled partial
// copies to be stitched together.
if (create_upscaled_copy)
{
ScaleTextureCacheEntryTo(stitched_entry, g_renderer->EFBToScaledX(stitched_entry->native_width),
g_renderer->EFBToScaledY(stitched_entry->native_height));
}
for (TCacheEntry* entry : candidates) for (TCacheEntry* entry : candidates)
{ {
if (tex_info.is_palette_texture) int src_x, src_y, dst_x, dst_y;
{
TCacheEntry* decoded_entry =
ApplyPaletteToEntry(entry, nullptr, tex_info.full_format.tlutfmt);
if (decoded_entry)
{
// Link the efb copy with the partially updated texture, so we won't apply this partial
// update again
entry->CreateReference(stitched_entry);
// Mark the texture update as used, as if it was loaded directly
entry->frameCount = FRAMECOUNT_INVALID;
entry = decoded_entry;
}
else
{
continue;
}
}
s32 src_x, src_y, dst_x, dst_y;
// Note for understanding the math:
// Normal textures can't be strided, so the 2 missing cases with src_x > 0 don't exist
if (entry->addr >= stitched_entry->addr) if (entry->addr >= stitched_entry->addr)
{ {
s32 block_offset = (entry->addr - stitched_entry->addr) / tex_info.bytes_per_block; int pixel_offset = (entry->addr - stitched_entry->addr) / 2;
s32 block_x = block_offset % numBlocksX;
s32 block_y = block_offset / numBlocksX;
src_x = 0; src_x = 0;
src_y = 0; src_y = 0;
dst_x = block_x * tex_info.block_width; dst_x = pixel_offset % stitched_entry->native_width;
dst_y = block_y * tex_info.block_height; dst_y = pixel_offset / stitched_entry->native_width;
} }
else else
{ {
s32 srcNumBlocksX = entry->native_width / tex_info.block_width; int pixel_offset = (stitched_entry->addr - entry->addr) / 2;
s32 block_offset = (stitched_entry->addr - entry->addr) / tex_info.bytes_per_block; src_x = pixel_offset % entry->native_width;
s32 block_x = block_offset % srcNumBlocksX; src_y = pixel_offset / entry->native_width;
s32 block_y = block_offset / srcNumBlocksX;
src_x = block_x * tex_info.block_width;
src_y = block_y * tex_info.block_height;
dst_x = 0; dst_x = 0;
dst_y = 0; dst_y = 0;
} }
const int native_width =
std::min(entry->native_width - src_x, stitched_entry->native_width - dst_x);
const int native_height =
std::min(entry->native_height - src_y, stitched_entry->native_height - dst_y);
int src_width = native_width;
int src_height = native_height;
int dst_width = native_width;
int dst_height = native_height;
// Scale to internal resolution.
if (entry->native_width != entry->GetWidth())
{
src_x = g_renderer->EFBToScaledX(src_x);
src_y = g_renderer->EFBToScaledY(src_y);
src_width = g_renderer->EFBToScaledX(src_width);
src_height = g_renderer->EFBToScaledY(src_height);
}
if (create_upscaled_copy)
{
dst_x = g_renderer->EFBToScaledX(dst_x);
dst_y = g_renderer->EFBToScaledY(dst_y);
dst_width = g_renderer->EFBToScaledX(dst_width);
dst_height = g_renderer->EFBToScaledY(dst_height);
}
// If the source rectangle is outside of what we actually have in VRAM, skip the copy. // If the source rectangle is outside of what we actually have in VRAM, skip the copy.
// The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates // The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates
// to the graphics driver, which can cause GPU resets. // to the graphics driver, which can cause GPU resets.
if (static_cast<u32>(src_x) >= entry->native_width || if (static_cast<u32>(src_x + src_width) > entry->GetWidth() ||
static_cast<u32>(src_y) >= entry->native_height || static_cast<u32>(src_y + src_height) > entry->GetHeight() ||
static_cast<u32>(dst_x) >= stitched_entry->native_width || static_cast<u32>(dst_x + dst_width) > stitched_entry->GetWidth() ||
static_cast<u32>(dst_y) >= stitched_entry->native_height) static_cast<u32>(dst_y + dst_height) > stitched_entry->GetHeight())
{ {
continue; continue;
} }
u32 copy_width = std::min(entry->native_width - src_x, stitched_entry->native_width - dst_x);
u32 copy_height = std::min(entry->native_height - src_y, stitched_entry->native_height - dst_y);
// If one of the textures is scaled, scale both with the current efb scaling factor
if (stitched_entry->native_width != stitched_entry->GetWidth() ||
stitched_entry->native_height != stitched_entry->GetHeight() ||
entry->native_width != entry->GetWidth() || entry->native_height != entry->GetHeight())
{
ScaleTextureCacheEntryTo(stitched_entry,
g_renderer->EFBToScaledX(stitched_entry->native_width),
g_renderer->EFBToScaledY(stitched_entry->native_height));
ScaleTextureCacheEntryTo(entry, g_renderer->EFBToScaledX(entry->native_width),
g_renderer->EFBToScaledY(entry->native_height));
src_x = g_renderer->EFBToScaledX(src_x);
src_y = g_renderer->EFBToScaledY(src_y);
dst_x = g_renderer->EFBToScaledX(dst_x);
dst_y = g_renderer->EFBToScaledY(dst_y);
copy_width = g_renderer->EFBToScaledX(copy_width);
copy_height = g_renderer->EFBToScaledY(copy_height);
}
MathUtil::Rectangle<int> srcrect, dstrect; MathUtil::Rectangle<int> srcrect, dstrect;
srcrect.left = src_x; srcrect.left = src_x;
srcrect.top = src_y; srcrect.top = src_y;
srcrect.right = (src_x + copy_width); srcrect.right = (src_x + src_width);
srcrect.bottom = (src_y + copy_height); srcrect.bottom = (src_y + src_height);
dstrect.left = dst_x; dstrect.left = dst_x;
dstrect.top = dst_y; dstrect.top = dst_y;
dstrect.right = (dst_x + copy_width); dstrect.right = (dst_x + dst_width);
dstrect.bottom = (dst_y + copy_height); dstrect.bottom = (dst_y + dst_height);
// If one copy is stereo, and the other isn't... not much we can do here :/ // We may have to scale if one of the copies is not internal resolution.
const u32 layers_to_copy = std::min(entry->GetNumLayers(), stitched_entry->GetNumLayers()); if (srcrect.GetWidth() != dstrect.GetWidth() || srcrect.GetHeight() != dstrect.GetHeight())
for (u32 layer = 0; layer < layers_to_copy; layer++)
{ {
stitched_entry->texture->CopyRectangleFromTexture(entry->texture.get(), srcrect, layer, 0, g_renderer->ScaleTexture(stitched_entry->framebuffer.get(), dstrect, entry->texture.get(),
dstrect, layer, 0); srcrect);
}
updated_entry = true;
if (tex_info.is_palette_texture)
{
// Remove the temporary converted texture, it won't be used anywhere else
// TODO: It would be nice to convert and copy in one step, but this code path isn't common
InvalidateTexture(GetTexCacheIter(entry));
} }
else else
{ {
// Link the two textures together, so we won't apply this partial update again // If one copy is stereo, and the other isn't... not much we can do here :/
entry->CreateReference(stitched_entry); const u32 layers_to_copy = std::min(entry->GetNumLayers(), stitched_entry->GetNumLayers());
// Mark the texture update as used, as if it was loaded directly for (u32 layer = 0; layer < layers_to_copy; layer++)
entry->frameCount = FRAMECOUNT_INVALID; {
} stitched_entry->texture->CopyRectangleFromTexture(entry->texture.get(), srcrect, layer, 0,
} dstrect, layer, 0);
}
if (!updated_entry)
{
// Kinda annoying that we have to throw away the texture we just created, but with the above
// code requiring the TCacheEntry object exists, can't do much at the moment.
InvalidateTexture(GetTexCacheIter(stitched_entry));
return nullptr;
}
stitched_entry->texture->FinishedRendering();
return stitched_entry;
}
TextureCacheBase::TCacheEntry*
TextureCacheBase::CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers)
{
// create the entry/texture
const TextureConfig config(tex_info.native_width, tex_info.native_height,
tex_info.computed_levels, layers, 1, AbstractTextureFormat::RGBA8,
AbstractTextureFlag_RenderTarget);
TCacheEntry* entry = AllocateCacheEntry(config);
if (!entry)
return nullptr;
textures_by_address.emplace(tex_info.address, entry);
if (tex_info.texture_cache_safety_color_sample_size == 0 ||
std::max(tex_info.total_bytes, tex_info.palette_size) <=
(u32)tex_info.texture_cache_safety_color_sample_size * 8)
{
entry->textures_by_hash_iter = textures_by_hash.emplace(tex_info.full_hash, entry);
}
entry->SetGeneralParameters(tex_info.address, tex_info.total_bytes, tex_info.full_format, false);
entry->SetDimensions(tex_info.native_width, tex_info.native_height, tex_info.computed_levels);
entry->SetHashes(tex_info.base_hash, tex_info.full_hash);
entry->is_custom_tex = false;
entry->memory_stride = entry->BytesPerRow();
entry->SetNotCopy();
INCSTAT(stats.numTexturesUploaded);
SETSTAT(stats.numTexturesAlive, textures_by_address.size());
return entry;
}
TextureCacheBase::TCacheEntry*
TextureCacheBase::GetTextureFromMemory(const TextureLookupInformation& tex_info)
{
// We can decode on the GPU if it is a supported format and the flag is enabled.
// Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
const bool decode_on_gpu =
g_ActiveConfig.UseGPUTextureDecoding() &&
!(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8);
// Since it's coming from RAM, it can only have one layer (no stereo).
TCacheEntry* entry = CreateNormalTexture(tex_info, 1);
entry->may_have_overlapping_textures = false;
LoadTextureLevelZeroFromMemory(entry, tex_info, decode_on_gpu);
entry->texture->FinishedRendering();
return entry;
}
void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_update,
const TextureLookupInformation& tex_info,
bool decode_on_gpu)
{
const u8* tlut = &texMem[tex_info.tlut_address];
if (!decode_on_gpu ||
!DecodeTextureOnGPU(entry_to_update, 0, tex_info.src_data, tex_info.total_bytes,
tex_info.full_format.texfmt, tex_info.native_width,
tex_info.native_height, tex_info.expanded_width, tex_info.expanded_height,
tex_info.bytes_per_block *
(tex_info.expanded_width / tex_info.block_width),
tlut, tex_info.full_format.tlutfmt))
{
size_t decoded_texture_size = tex_info.expanded_width * sizeof(u32) * tex_info.expanded_height;
CheckTempSize(decoded_texture_size);
if (!(tex_info.full_format.texfmt == TextureFormat::RGBA8 && tex_info.from_tmem))
{
TexDecoder_Decode(temp, tex_info.src_data, tex_info.expanded_width, tex_info.expanded_height,
tex_info.full_format.texfmt, tlut, tex_info.full_format.tlutfmt);
}
else
{
u8* src_data_gb = &texMem[tex_info.tmem_address_odd];
TexDecoder_DecodeRGBA8FromTmem(temp, tex_info.src_data, src_data_gb, tex_info.expanded_width,
tex_info.expanded_height);
} }
entry_to_update->texture->Load(0, tex_info.native_width, tex_info.native_height, // Link the two textures together, so we won't apply this partial update again
tex_info.expanded_width, temp, decoded_texture_size); entry->CreateReference(stitched_entry);
// Mark the texture update as used, as if it was loaded directly
entry->frameCount = FRAMECOUNT_INVALID;
} }
} }
@ -1746,8 +1608,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(
// For the latter, we keep the EFB resolution for the virtual XFB blit. // For the latter, we keep the EFB resolution for the virtual XFB blit.
u32 tex_w = width; u32 tex_w = width;
u32 tex_h = height; u32 tex_h = height;
u32 scaled_tex_w = g_renderer->EFBToScaledX(srcRect.GetWidth()); u32 scaled_tex_w = g_renderer->EFBToScaledX(width);
u32 scaled_tex_h = g_renderer->EFBToScaledY(srcRect.GetHeight()); u32 scaled_tex_h = g_renderer->EFBToScaledY(height);
if (scaleByHalf) if (scaleByHalf)
{ {
@ -1798,6 +1660,12 @@ void TextureCacheBase::CopyRenderTargetToTexture(
copy_to_vram = false; copy_to_vram = false;
} }
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x.
// TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
// complex down filtering to average all pixels and produce the correct result.
const bool linear_filter =
!is_depth_copy && (scaleByHalf || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f);
TCacheEntry* entry = nullptr; TCacheEntry* entry = nullptr;
if (copy_to_vram) if (copy_to_vram)
{ {
@ -1822,8 +1690,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(
entry->may_have_overlapping_textures = false; entry->may_have_overlapping_textures = false;
entry->is_custom_tex = false; entry->is_custom_tex = false;
CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity, gamma, CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, linear_filter, dstFormat,
clamp_top, clamp_bottom, isIntensity, gamma, clamp_top, clamp_bottom,
GetVRAMCopyFilterCoefficients(filter_coefficients)); GetVRAMCopyFilterCoefficients(filter_coefficients));
if (g_ActiveConfig.bDumpEFBTarget && !is_xfb_copy) if (g_ActiveConfig.bDumpEFBTarget && !is_xfb_copy)
@ -1857,7 +1725,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (staging_texture) if (staging_texture)
{ {
CopyEFB(staging_texture.get(), format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, CopyEFB(staging_texture.get(), format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect,
scaleByHalf, y_scale, gamma, clamp_top, clamp_bottom, coefficients); scaleByHalf, linear_filter, y_scale, gamma, clamp_top, clamp_bottom, coefficients);
// We can't defer if there is no VRAM copy (since we need to update the hash). // We can't defer if there is no VRAM copy (since we need to update the hash).
if (!copy_to_vram || !g_ActiveConfig.bDeferEFBCopies) if (!copy_to_vram || !g_ActiveConfig.bDeferEFBCopies)
@ -1930,7 +1798,11 @@ void TextureCacheBase::CopyRenderTargetToTexture(
iter.first = InvalidateTexture(iter.first, true); iter.first = InvalidateTexture(iter.first, true);
continue; continue;
} }
overlapping_entry->may_have_overlapping_textures = true;
// We don't want to change the may_have_overlapping_textures flag on XFB container entries
// because otherwise they can't be re-used/updated, leaking textures for several frames.
if (!overlapping_entry->is_xfb_container)
overlapping_entry->may_have_overlapping_textures = true;
// There are cases (Rogue Squadron 2 / Texas Holdem on Wiiware) where // There are cases (Rogue Squadron 2 / Texas Holdem on Wiiware) where
// for xfb copies the textures overlap which causes the hash of the first copy // for xfb copies the textures overlap which causes the hash of the first copy
@ -2287,8 +2159,9 @@ bool TextureCacheBase::CreateUtilityTextures()
void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half, const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity, float gamma, bool linear_filter, EFBCopyFormat dst_format,
bool clamp_top, bool clamp_bottom, bool is_intensity, float gamma, bool clamp_top,
bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) const EFBCopyFilterCoefficients& filter_coefficients)
{ {
// Flush EFB pokes first, as they're expected to be included. // Flush EFB pokes first, as they're expected to be included.
@ -2347,7 +2220,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
g_renderer->SetViewportAndScissor(entry->framebuffer->GetRect()); g_renderer->SetViewportAndScissor(entry->framebuffer->GetRect());
g_renderer->SetPipeline(copy_pipeline); g_renderer->SetPipeline(copy_pipeline);
g_renderer->SetTexture(0, src_texture); g_renderer->SetTexture(0, src_texture);
g_renderer->SetSamplerState(0, scale_by_half ? RenderState::GetLinearSamplerState() : g_renderer->SetSamplerState(0, linear_filter ? RenderState::GetLinearSamplerState() :
RenderState::GetPointSamplerState()); RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3); g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing(); g_renderer->EndUtilityDrawing();
@ -2357,7 +2230,8 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom, bool linear_filter, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) const EFBCopyFilterCoefficients& filter_coefficients)
{ {
// Flush EFB pokes first, as they're expected to be included. // Flush EFB pokes first, as they're expected to be included.
@ -2406,12 +2280,6 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
encoder_params.filter_coefficients[2] = filter_coefficients.lower; encoder_params.filter_coefficients[2] = filter_coefficients.lower;
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params)); g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x
// TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
// complex down filtering to average all pixels and produce the correct result.
const bool linear_filter =
(scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f;
// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left. // Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.
const u32 render_width = bytes_per_row / sizeof(u32); const u32 render_width = bytes_per_row / sizeof(u32);
const u32 render_height = num_blocks_y; const u32 render_height = num_blocks_y;
@ -2566,6 +2434,7 @@ void TextureCacheBase::TCacheEntry::SetXfbCopy(u32 stride)
{ {
is_efb_copy = false; is_efb_copy = false;
is_xfb_copy = true; is_xfb_copy = true;
is_xfb_container = false;
memory_stride = stride; memory_stride = stride;
ASSERT_MSG(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small"); ASSERT_MSG(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small");
@ -2577,6 +2446,7 @@ void TextureCacheBase::TCacheEntry::SetEfbCopy(u32 stride)
{ {
is_efb_copy = true; is_efb_copy = true;
is_xfb_copy = false; is_xfb_copy = false;
is_xfb_container = false;
memory_stride = stride; memory_stride = stride;
ASSERT_MSG(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small"); ASSERT_MSG(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small");
@ -2586,8 +2456,9 @@ void TextureCacheBase::TCacheEntry::SetEfbCopy(u32 stride)
void TextureCacheBase::TCacheEntry::SetNotCopy() void TextureCacheBase::TCacheEntry::SetNotCopy()
{ {
is_xfb_copy = false;
is_efb_copy = false; is_efb_copy = false;
is_xfb_copy = false;
is_xfb_container = false;
} }
int TextureCacheBase::TCacheEntry::HashSampleSize() const int TextureCacheBase::TCacheEntry::HashSampleSize() const

View File

@ -77,42 +77,6 @@ struct EFBCopyFilterCoefficients
float lower; float lower;
}; };
struct TextureLookupInformation
{
u32 address;
u32 block_width;
u32 block_height;
u32 bytes_per_block;
u32 expanded_width;
u32 expanded_height;
u32 native_width;
u32 native_height;
u32 total_bytes;
u32 native_levels = 1;
u32 computed_levels;
u64 base_hash;
u64 full_hash;
TextureAndTLUTFormat full_format;
u32 tlut_address = 0;
bool is_palette_texture = false;
u32 palette_size = 0;
bool use_mipmaps = false;
bool from_tmem = false;
u32 tmem_address_even = 0;
u32 tmem_address_odd = 0;
int texture_cache_safety_color_sample_size = 0; // Default to safe hashing
u8* src_data;
};
class TextureCacheBase class TextureCacheBase
{ {
private: private:
@ -138,6 +102,7 @@ public:
// content, aren't just downscaled // content, aren't just downscaled
bool should_force_safe_hashing = false; // for XFB bool should_force_safe_hashing = false; // for XFB
bool is_xfb_copy = false; bool is_xfb_copy = false;
bool is_xfb_container = false;
u64 id; u64 id;
bool reference_changed = false; // used by xfb to determine when a reference xfb changed bool reference_changed = false; // used by xfb to determine when a reference xfb changed
@ -243,20 +208,9 @@ public:
TLUTFormat tlutfmt = TLUTFormat::IA8, bool use_mipmaps = false, TLUTFormat tlutfmt = TLUTFormat::IA8, bool use_mipmaps = false,
u32 tex_levels = 1, bool from_tmem = false, u32 tmem_address_even = 0, u32 tex_levels = 1, bool from_tmem = false, u32 tmem_address_even = 0,
u32 tmem_address_odd = 0); u32 tmem_address_odd = 0);
TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
MathUtil::Rectangle<int>* display_rect);
TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, TextureFormat texformat,
int textureCacheSafetyColorSampleSize);
std::optional<TextureLookupInformation>
ComputeTextureInformation(u32 address, u32 width, u32 height, TextureFormat texformat,
int textureCacheSafetyColorSampleSize, bool from_tmem,
u32 tmem_address_even, u32 tmem_address_odd, u32 tlutaddr,
TLUTFormat tlutfmt, u32 levels);
TCacheEntry* GetXFBFromCache(const TextureLookupInformation& tex_info);
TCacheEntry* GetTextureFromOverlappingTextures(const TextureLookupInformation& tex_info);
TCacheEntry* GetTextureFromMemory(const TextureLookupInformation& tex_info);
TCacheEntry* CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers);
void LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_update,
const TextureLookupInformation& tex_info, bool decode_on_gpu);
virtual void BindTextures(); virtual void BindTextures();
void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height,
u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect, u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect,
@ -289,13 +243,13 @@ protected:
virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, const EFBRectangle& src_rect, bool scale_by_half, bool linear_filter,
bool clamp_top, bool clamp_bottom, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients); const EFBCopyFilterCoefficients& filter_coefficients);
virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half, const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity, float gamma, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
bool clamp_top, bool clamp_bottom, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients); const EFBCopyFilterCoefficients& filter_coefficients);
alignas(16) u8* temp = nullptr; alignas(16) u8* temp = nullptr;
@ -322,10 +276,13 @@ private:
void SetBackupConfig(const VideoConfig& config); void SetBackupConfig(const VideoConfig& config);
TCacheEntry* GetXFBFromCache(u32 address, u32 width, u32 height, u32 stride, u64 hash);
TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt); TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt);
TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt); TLUTFormat tlutfmt);
void StitchXFBCopy(TCacheEntry* entry_to_update);
void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level, bool is_arbitrary); void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level, bool is_arbitrary);
void CheckTempSize(size_t required_size); void CheckTempSize(size_t required_size);

View File

@ -116,6 +116,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt); TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt);
void TexDecoder_DecodeTexelRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int s, int t, void TexDecoder_DecodeTexelRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int s, int t,
int imageWidth); int imageWidth);
void TexDecoder_DecodeXFB(u8* dst, const u8* src, u32 width, u32 height, u32 stride);
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center); void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);

View File

@ -751,3 +751,41 @@ void TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb,
} }
} }
} }
void TexDecoder_DecodeXFB(u8* dst, const u8* src, u32 width, u32 height, u32 stride)
{
const u8* src_ptr = src;
u8* dst_ptr = dst;
for (u32 y = 0; y < height; y++)
{
const u8* row_ptr = src_ptr;
for (u32 x = 0; x < width; x += 2)
{
// We do this one color sample (aka 2 RGB pixels) at a time
int Y1 = int(*(row_ptr++)) - 16;
int U = int(*(row_ptr++)) - 128;
int Y2 = int(*(row_ptr++)) - 16;
int V = int(*(row_ptr++)) - 128;
// We do the inverse BT.601 conversion for YCbCr to RGB
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
u8 R1 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 + 1.596f * V), 0, 255));
u8 G1 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 - 0.392f * U - 0.813f * V), 0, 255));
u8 B1 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 + 2.017f * U), 0, 255));
u8 R2 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 + 1.596f * V), 0, 255));
u8 G2 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 - 0.392f * U - 0.813f * V), 0, 255));
u8 B2 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 + 2.017f * U), 0, 255));
u32 rgba = 0xff000000 | B1 << 16 | G1 << 8 | R1;
std::memcpy(dst_ptr, &rgba, sizeof(rgba));
dst_ptr += sizeof(rgba);
rgba = 0xff000000 | B2 << 16 | G2 << 8 | R2;
std::memcpy(dst_ptr, &rgba, sizeof(rgba));
dst_ptr += sizeof(rgba);
}
src_ptr += stride;
}
}

View File

@ -346,5 +346,8 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, Text
} }
break; break;
} }
case TextureFormat::XFB:
TexDecoder_DecodeXFB(reinterpret_cast<u8*>(dst), src, width, height, width * 2);
break;
} }
} }

View File

@ -1488,37 +1488,8 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, Text
break; break;
case TextureFormat::XFB: case TextureFormat::XFB:
{ TexDecoder_DecodeXFB(reinterpret_cast<u8*>(dst), src, width, height, width * 2);
for (int y = 0; y < height; y += 1) break;
{
for (int x = 0; x < width; x += 2)
{
size_t offset = static_cast<size_t>((y * width + x) * 2);
// We do this one color sample (aka 2 RGB pixles) at a time
int Y1 = int(src[offset]) - 16;
int U = int(src[offset + 1]) - 128;
int Y2 = int(src[offset + 2]) - 16;
int V = int(src[offset + 3]) - 128;
// We do the inverse BT.601 conversion for YCbCr to RGB
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
u8 R1 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 + 1.596f * V), 0, 255));
u8 G1 =
static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 - 0.392f * U - 0.813f * V), 0, 255));
u8 B1 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 + 2.017f * U), 0, 255));
u8 R2 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 + 1.596f * V), 0, 255));
u8 G2 =
static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 - 0.392f * U - 0.813f * V), 0, 255));
u8 B2 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 + 2.017f * U), 0, 255));
dst[y * width + x] = 0xff000000 | B1 << 16 | G1 << 8 | R1;
dst[y * width + x + 1] = 0xff000000 | B2 << 16 | G2 << 8 | R2;
}
}
}
break;
default: default:
PanicAlert("Invalid Texture Format (0x%X)! (_TexDecoder_DecodeImpl)", PanicAlert("Invalid Texture Format (0x%X)! (_TexDecoder_DecodeImpl)",