esx: Fixes to the texture cache

rsx: Blit engine improvements
- Always handle blits to and from framebuffers through the GPU
- Handle depth surfaces properly when using GL
- Check for format mismatches when blitting to the surface store [WIP]
This commit is contained in:
kd-11 2017-09-04 13:05:02 +03:00
parent 73312fc363
commit 2d0f1f27a8
7 changed files with 189 additions and 96 deletions

View File

@ -469,6 +469,44 @@ namespace rsx
Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds)), nullptr, true); Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds)), nullptr, true);
} }
/**
* Moves a single surface from surface storage to invalidated surface store.
* Can be triggered by the texture cache's blit functionality when formats do not match
*/
void invalidate_single_surface(surface_type surface, bool depth)
{
if (!depth)
{
for (auto It = m_render_targets_storage.begin(); It != m_render_targets_storage.end(); It++)
{
const auto address = It->first;
const auto ref = Traits::get(It->second);
if (surface == ref)
{
invalidated_resources.push_back(std::move(It->second));
m_render_targets_storage.erase(It);
return;
}
}
}
else
{
for (auto It = m_depth_stencil_storage.begin(); It != m_depth_stencil_storage.end(); It++)
{
const auto address = It->first;
const auto ref = Traits::get(It->second);
if (surface == ref)
{
invalidated_resources.push_back(std::move(It->second));
m_depth_stencil_storage.erase(It);
return;
}
}
}
}
/** /**
* Clipping and fitting lookup funcrions * Clipping and fitting lookup funcrions
* surface_overlaps - returns true if surface overlaps a given surface address and returns the relative x and y position of the surface address within the surface * surface_overlaps - returns true if surface overlaps a given surface address and returns the relative x and y position of the surface address within the surface

View File

@ -894,8 +894,13 @@ bool GLGSRender::check_program_state()
if (!is_depth) if (!is_depth)
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
else else
{
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
if (!surface && m_gl_texture_cache.is_depth_texture(texaddr))
return std::make_tuple(true, 0);
}
if (!surface) if (!surface)
{ {
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch()); auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
@ -922,18 +927,6 @@ void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
auto &fragment_program = current_fragment_program; auto &fragment_program = current_fragment_program;
auto &vertex_program = current_vertex_program; auto &vertex_program = current_vertex_program;
for (auto &vtx : vertex_program.rsx_vertex_inputs)
{
auto &array_info = rsx::method_registers.vertex_arrays_info[vtx.location];
if (array_info.type() == rsx::vertex_base_type::s1 ||
array_info.type() == rsx::vertex_base_type::cmp)
{
//Some vendors do not support GL_x_SNORM buffer textures
verify(HERE), vtx.flags == 0;
vtx.flags |= GL_VP_FORCE_ATTRIB_SCALING | GL_VP_ATTRIB_S16_INT;
}
}
vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
void* pipeline_properties = nullptr; void* pipeline_properties = nullptr;

View File

@ -1916,7 +1916,7 @@ namespace gl
}; };
protected: protected:
GLuint m_id; GLuint m_id = GL_NONE;
fbo &m_parent; fbo &m_parent;
public: public:

View File

@ -142,6 +142,7 @@ namespace gl
flushed = false; flushed = false;
copied = false; copied = false;
is_depth = false;
vram_texture = 0; vram_texture = 0;
} }
@ -194,6 +195,11 @@ namespace gl
real_pitch = current_width * get_pixel_size(format, type); real_pitch = current_width * get_pixel_size(format, type);
} }
void set_depth_flag(bool is_depth_fmt)
{
is_depth = is_depth_fmt;
}
void set_source(gl::texture &source) void set_source(gl::texture &source)
{ {
vram_texture = source.id(); vram_texture = source.id();
@ -339,69 +345,38 @@ namespace gl
{ {
return std::make_tuple(current_width, current_height); return std::make_tuple(current_width, current_height);
} }
bool is_depth_texture() const
{
return is_depth;
}
}; };
class blitter class blitter
{ {
fbo fbo_argb8;
fbo fbo_rgb565;
fbo blit_src; fbo blit_src;
fbo blit_dst;
u32 argb8_surface = 0;
u32 rgb565_surface = 0;
public: public:
void init() void init()
{ {
fbo_argb8.create();
fbo_rgb565.create();
blit_src.create(); blit_src.create();
blit_dst.create();
glGenTextures(1, &argb8_surface);
glBindTexture(GL_TEXTURE_2D, argb8_surface);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 4096, 4096);
glGenTextures(1, &rgb565_surface);
glBindTexture(GL_TEXTURE_2D, rgb565_surface);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGB565, 4096, 4096);
s32 old_fbo = 0;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo);
fbo_argb8.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, argb8_surface, 0);
fbo_rgb565.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rgb565_surface, 0);
glBindFramebuffer(GL_FRAMEBUFFER, old_fbo);
fbo_argb8.check();
fbo_rgb565.check();
} }
void destroy() void destroy()
{ {
fbo_argb8.remove(); blit_dst.remove();
fbo_rgb565.remove();
blit_src.remove(); blit_src.remove();
glDeleteTextures(1, &argb8_surface);
glDeleteTextures(1, &rgb565_surface);
} }
u32 scale_image(u32 src, u32 dst, const areai src_rect, const areai dst_rect, const position2i dst_offset, const position2i clip_offset, u32 scale_image(u32 src, u32 dst, const areai src_rect, const areai dst_rect, const GLenum dst_format, const position2i dst_offset, const position2i /*clip_offset*/,
const size2i dst_dims, const size2i clip_dims, bool is_argb8, bool linear_interpolation) const size2i dst_dims, const size2i clip_dims, bool /*is_argb8*/, bool is_depth_copy, bool linear_interpolation)
{ {
s32 old_fbo = 0; s32 old_fbo = 0;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo); glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo);
blit_src.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src, 0);
blit_src.check();
u32 src_surface = 0;
u32 dst_tex = dst; u32 dst_tex = dst;
filter interp = linear_interpolation ? filter::linear : filter::nearest; filter interp = linear_interpolation ? filter::linear : filter::nearest;
@ -411,30 +386,43 @@ namespace gl
glBindTexture(GL_TEXTURE_2D, dst_tex); glBindTexture(GL_TEXTURE_2D, dst_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexStorage2D(GL_TEXTURE_2D, 1, dst_format, dst_dims.width, dst_dims.height);
}
if (is_argb8) GLenum attachment = is_depth_copy ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0;
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, dst_dims.width, dst_dims.height);
else blit_src.bind();
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGB565, dst_dims.width, dst_dims.height); glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src, 0);
blit_src.check();
blit_dst.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_tex, 0);
blit_dst.check();
u32 src_width = src_rect.x2 - src_rect.x1;
u32 src_height = src_rect.y2 - src_rect.y1;
u32 dst_width = dst_rect.x2 - dst_rect.x1;
u32 dst_height = dst_rect.y2 - dst_rect.y1;
if (clip_dims.width != dst_width ||
clip_dims.height != dst_height)
{
//clip reproject
src_width = (src_width * clip_dims.width) / dst_width;
src_height = (src_height * clip_dims.height) / dst_height;
} }
GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST); GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST);
if (scissor_test_enabled) if (scissor_test_enabled)
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
if (is_argb8) areai dst_area = dst_rect;
{ dst_area.x1 += dst_offset.x;
blit_src.blit(fbo_argb8, src_rect, dst_rect, buffers::color, interp); dst_area.x2 += dst_offset.x;
src_surface = argb8_surface; dst_area.y1 += dst_offset.y;
} dst_area.y2 += dst_offset.y;
else
{
blit_src.blit(fbo_rgb565, src_rect, dst_rect, buffers::color, interp);
src_surface = rgb565_surface;
}
glCopyImageSubData(src_surface, GL_TEXTURE_2D, 0, clip_offset.x, clip_offset.y, 0, blit_src.blit(blit_dst, src_rect, dst_area, is_depth_copy ? buffers::depth : buffers::color, interp);
dst_tex, GL_TEXTURE_2D, 0, dst_offset.x, dst_offset.y, 0, clip_dims.width, clip_dims.height, 1);
if (scissor_test_enabled) if (scissor_test_enabled)
glEnable(GL_SCISSOR_TEST); glEnable(GL_SCISSOR_TEST);
@ -1057,11 +1045,19 @@ namespace gl
} }
} }
bool is_depth_texture(const u32 rsx_address)
{
auto section = find_texture_from_range(rsx_address, 64u);
if (section != nullptr) return section->is_depth_texture();
return false;
}
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts) bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts)
{ {
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag //Since we will have dst in vram, we can 'safely' ignore the swizzle flag
//TODO: Verify correct behavior //TODO: Verify correct behavior
bool is_depth_blit = false;
bool src_is_render_target = false; bool src_is_render_target = false;
bool dst_is_render_target = false; bool dst_is_render_target = false;
bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8); bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8);
@ -1078,9 +1074,17 @@ namespace gl
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0));
//Check if src/dst are parts of render targets //Check if src/dst are parts of render targets
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, true); auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false);
dst_is_render_target = dst_subres.surface != nullptr; dst_is_render_target = dst_subres.surface != nullptr;
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, false);
src_is_render_target = src_subres.surface != nullptr;
//Always use GPU blit if src or dst is in the surface store
if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target))
return false;
u16 max_dst_width = dst.width; u16 max_dst_width = dst.width;
u16 max_dst_height = dst.height; u16 max_dst_height = dst.height;
@ -1115,6 +1119,7 @@ namespace gl
bool is_memcpy = false; bool is_memcpy = false;
u32 memcpy_bytes_length = 0; u32 memcpy_bytes_length = 0;
if (dst_is_argb8 == src_is_argb8 && !dst.swizzled) if (dst_is_argb8 == src_is_argb8 && !dst.swizzled)
{ {
if ((src.slice_h == 1 && dst.clip_height == 1) || if ((src.slice_h == 1 && dst.clip_height == 1) ||
@ -1126,11 +1131,12 @@ namespace gl
} }
} }
cached_texture_section* cached_dest = nullptr;
if (!dst_is_render_target) if (!dst_is_render_target)
{ {
//First check if this surface exists in VRAM with exact dimensions //First check if this surface exists in VRAM with exact dimensions
//Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible //Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible
auto cached_dest = find_texture_from_dimensions(dst.rsx_address, dst_dimensions.width, dst_dimensions.height); cached_dest = find_texture_from_dimensions(dst.rsx_address, dst_dimensions.width, dst_dimensions.height);
//Check for any available region that will fit this one //Check for any available region that will fit this one
if (!cached_dest) cached_dest = find_texture_from_range(dst.rsx_address, dst.pitch * dst.clip_height); if (!cached_dest) cached_dest = find_texture_from_range(dst.rsx_address, dst.pitch * dst.clip_height);
@ -1184,10 +1190,6 @@ namespace gl
} }
} }
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, true);
src_is_render_target = src_subres.surface != nullptr;
//Create source texture if does not exist //Create source texture if does not exist
if (!src_is_render_target) if (!src_is_render_target)
{ {
@ -1255,6 +1257,38 @@ namespace gl
vram_texture = src_subres.surface->id(); vram_texture = src_subres.surface->id();
} }
bool format_mismatch = false;
if (src_subres.is_depth_surface)
{
if (dest_texture)
{
if (dst_is_render_target && !dst_subres.is_depth_surface)
{
LOG_ERROR(RSX, "Depth->RGBA blit requested but not supported");
return true;
}
GLenum internal_fmt;
glBindTexture(GL_TEXTURE_2D, dest_texture);
glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&internal_fmt);
if (internal_fmt != (GLenum)src_subres.surface->get_compatible_internal_format())
format_mismatch = true;
}
is_depth_blit = true;
}
//TODO: Check for other types of format mismatch
if (format_mismatch)
{
invalidate_range(cached_dest->get_section_base(), cached_dest->get_section_size());
dest_texture = 0;
cached_dest = nullptr;
}
//Validate clip offsets (Persona 4 Arena at 720p) //Validate clip offsets (Persona 4 Arena at 720p)
//Check if can fit //Check if can fit
//NOTE: It is possible that the check is simpler (if (clip_x >= clip_width)) //NOTE: It is possible that the check is simpler (if (clip_x >= clip_width))
@ -1274,8 +1308,9 @@ namespace gl
src_area.y2 += scaled_clip_offset_y; src_area.y2 += scaled_clip_offset_y;
} }
u32 texture_id = m_hw_blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, dst_offset, clip_offset, GLenum dst_format = (is_depth_blit) ? (GLenum)src_subres.surface->get_compatible_internal_format() : (dst_is_argb8) ? GL_RGBA8 : GL_RGB565;
dst_dimensions, clip_dimensions, dst_is_argb8, interpolate); u32 texture_id = m_hw_blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, dst_format, dst_offset, clip_offset,
dst_dimensions, clip_dimensions, dst_is_argb8, is_depth_blit, interpolate);
if (dest_texture) if (dest_texture)
return true; return true;
@ -1292,6 +1327,7 @@ namespace gl
//Its is possible for a title to attempt to read from the region, but the CPU path should be used in such cases //Its is possible for a title to attempt to read from the region, but the CPU path should be used in such cases
cached.protect(utils::protection::ro); cached.protect(utils::protection::ro);
cached.set_dirty(false); cached.set_dirty(false);
cached.set_depth_flag(is_depth_blit);
return true; return true;
} }

View File

@ -1658,7 +1658,7 @@ bool VKGSRender::check_program_status()
{ {
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
if (!surface && m_texture_cache.is_depth_texture(texaddr, m_rtts)) if (!surface && m_texture_cache.is_depth_texture(texaddr))
return std::make_tuple(true, 0); return std::make_tuple(true, 0);
} }

View File

@ -386,7 +386,7 @@ namespace vk
return nullptr; return nullptr;
} }
cached_texture_section *find_texture_from_dimensions(u32 rsx_address, u32 rsx_size, u16 width = 0, u16 height = 0, u16 mipmaps = 0) cached_texture_section *find_texture_from_dimensions(u32 rsx_address, u32 /*rsx_size*/, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{ {
auto found = m_cache.find(rsx_address); auto found = m_cache.find(rsx_address);
if (found != m_cache.end()) if (found != m_cache.end())
@ -607,11 +607,8 @@ namespace vk
purge_cache(); purge_cache();
} }
bool is_depth_texture(const u32 texaddr, rsx::vk_render_targets &m_rtts) bool is_depth_texture(const u32 texaddr)
{ {
if (m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
return true;
reader_lock lock(m_cache_mutex); reader_lock lock(m_cache_mutex);
auto found = m_cache.find(texaddr); auto found = m_cache.find(texaddr);
@ -1127,7 +1124,7 @@ namespace vk
} }
} }
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool /*interpolate*/,
vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue,
rsx::vk_render_targets &m_rtts, vk_data_heap &upload_heap, vk::buffer* upload_buffer) rsx::vk_render_targets &m_rtts, vk_data_heap &upload_heap, vk::buffer* upload_buffer)
{ {
@ -1155,6 +1152,14 @@ namespace vk
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false); auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false);
dst_is_render_target = dst_subres.surface != nullptr; dst_is_render_target = dst_subres.surface != nullptr;
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, false);
src_is_render_target = src_subres.surface != nullptr;
//Always use GPU blit if src or dst is in the surface store
if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target))
return false;
u16 max_dst_width = dst.width; u16 max_dst_width = dst.width;
u16 max_dst_height = dst.height; u16 max_dst_height = dst.height;
@ -1273,10 +1278,6 @@ namespace vk
} }
} }
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, false);
src_is_render_target = src_subres.surface != nullptr;
//Create source texture if does not exist //Create source texture if does not exist
if (!src_is_render_target) if (!src_is_render_target)
{ {
@ -1362,6 +1363,8 @@ namespace vk
const u32 real_width = dst.pitch / bpp; const u32 real_width = dst.pitch / bpp;
//If src is depth, dest has to be depth as well //If src is depth, dest has to be depth as well
bool format_mismatch = false;
if (src_subres.is_depth_surface) if (src_subres.is_depth_surface)
{ {
if (dest_exists) if (dest_exists)
@ -1376,11 +1379,7 @@ namespace vk
{ {
if (dest_texture->info.format != src_subres.surface->info.format) if (dest_texture->info.format != src_subres.surface->info.format)
{ {
cached_dest->unprotect(); format_mismatch = true;
cached_dest->set_dirty(true);
dest_exists = false;
cached_dest = nullptr;
} }
} }
else else
@ -1402,6 +1401,33 @@ namespace vk
else else
aspect_to_copy = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; aspect_to_copy = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
} }
else
{
if (dest_exists && dest_texture->info.format != dst_vk_format)
{
LOG_ERROR(RSX, "Format mismatch - expected VkFormat 0x%X but found 0x%X instead", (u32)dst_vk_format, (u32)dest_texture->info.format);
format_mismatch = true;
if (dst_is_render_target)
{
if (dst_subres.is_bound)
{
LOG_ERROR(RSX, "Blit destination is an active render target but format does not match. Blit operation ignored.");
return true;
}
m_rtts.invalidate_single_surface(dst_subres.surface, dst_subres.is_depth_surface);
}
}
}
if (format_mismatch)
{
invalidate_range(cached_dest->get_section_base(), cached_dest->get_section_size());
dest_exists = false;
cached_dest = nullptr;
}
//Validate clip offsets (Persona 4 Arena at 720p) //Validate clip offsets (Persona 4 Arena at 720p)
//Check if can fit //Check if can fit

View File

@ -643,7 +643,7 @@ namespace rsx
} }
} }
if (g_cfg.video.use_gpu_texture_scaling && dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER) if (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER)
{ {
//For now, only use this for actual scaled images, there are use cases that should not go through 3d engine, e.g program ucode transfer //For now, only use this for actual scaled images, there are use cases that should not go through 3d engine, e.g program ucode transfer
//TODO: Figure out more instances where we can use this without problems //TODO: Figure out more instances where we can use this without problems