rsx/vk/gl: Refactoring and reimplementation of blit engine

Fix rsx offscreen-render-to-display-buffer-blit surface reads
- Also, properly scale display output height if reading from compressed tile

gl: Fix broken dst height computation
- The extra padding is only there to force power-of-2 sizes and isnt used

gl: Ignore compression scaling if output is rendered to in a renderpass

rsx/gl/vk: Cleanup for GPU texture scaling. Initial impl [WIP]
- TODO: Refactor more shared code into RSX/common
This commit is contained in:
kd-11 2017-08-14 00:27:19 +03:00
parent 3e3160d7ac
commit 2033f3f7dc
11 changed files with 664 additions and 296 deletions

View File

@ -13,6 +13,36 @@ namespace rsx
size_t get_packed_pitch(surface_color_format format, u32 width);
}
template <typename surface_type>
struct surface_subresource_storage
{
surface_type surface = nullptr;
u16 x = 0;
u16 y = 0;
u16 w = 0;
u16 h = 0;
bool is_bound = false;
bool is_depth_surface = false;
bool is_clipped = false;
surface_subresource_storage() {}
surface_subresource_storage(surface_type src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth, bool _Clipped = false)
: surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth), is_clipped(_Clipped)
{}
};
struct surface_format_info
{
u32 surface_width;
u32 surface_height;
u16 native_pitch;
u16 rsx_pitch;
u8 bpp;
};
/**
* Helper for surface (ie color and depth stencil render target) management.
* It handles surface creation and storage. Backend should only retrieve pointer to surface.
@ -64,6 +94,7 @@ namespace rsx
using surface_type = typename Traits::surface_type;
using command_list_type = typename Traits::command_list_type;
using download_buffer_object = typename Traits::download_buffer_object;
using surface_subresource = typename surface_subresource_storage<surface_type>;
std::unordered_map<u32, surface_storage_type> m_render_targets_storage = {};
std::unordered_map<u32, surface_storage_type> m_depth_stencil_storage = {};
@ -437,5 +468,179 @@ namespace rsx
for (auto &ds : m_depth_stencil_storage)
Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds)), nullptr, true);
}
/**
* Clipping and fitting lookup funcrions
* surface_overlaps - returns true if surface overlaps a given surface address and returns the relative x and y position of the surface address within the surface
* address_is_bound - returns true if the surface at a given address is actively bound
* get_surface_subresource_if_available - returns a sectiion descriptor that allows to crop surfaces stored in memory
*/
bool surface_overlaps_address(surface_type surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y, bool scale_to_fit)
{
bool is_subslice = false;
u16 x_offset = 0;
u16 y_offset = 0;
if (surface_address > texaddr)
return false;
u32 offset = texaddr - surface_address;
if (texaddr >= surface_address)
{
if (offset == 0)
{
is_subslice = true;
}
else
{
surface_format_info info;
Traits::get_surface_info(surface, &info);
u32 range = info.rsx_pitch * info.surface_height;
if (offset < range)
{
const u32 y = (offset / info.rsx_pitch);
u32 x = (offset % info.rsx_pitch) / info.bpp;
if (scale_to_fit)
{
const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch;
x = (u32)((f32)x / x_scale);
}
x_offset = x;
y_offset = y;
is_subslice = true;
}
}
if (is_subslice)
{
*x = x_offset;
*y = y_offset;
return true;
}
}
return false;
}
bool address_is_bound(u32 address, bool is_depth) const
{
if (is_depth)
{
const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil);
return (bound_depth_address == address);
}
for (auto &surface : m_bound_render_targets)
{
const u32 bound_address = std::get<0>(surface);
if (bound_address == address)
return true;
}
return false;
}
inline bool region_fits(u16 region_width, u16 region_height, u16 x_offset, u16 y_offset, u16 width, u16 height) const
{
if ((x_offset + width) > region_width) return false;
if ((y_offset + height) > region_height) return false;
return true;
}
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false)
{
auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped)
{
if (surface_overlaps_address(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit))
{
surface_format_info info;
Traits::get_surface_info(surface, &info);
if (info.rsx_pitch != requested_pitch)
return false;
u16 real_width = requested_width;
if (scale_to_fit)
{
f32 pitch_scaling = (f32)requested_pitch / info.native_pitch;
real_width = (u16)((f32)requested_width / pitch_scaling);
}
if (region_fits(info.surface_width, info.surface_height, x_offset, y_offset, real_width, requested_height))
{
w = info.surface_width;
h = info.surface_height;
clipped = false;
return true;
}
else
{
if (crop) //Forcefully fit the requested region by clipping and scaling
{
u16 remaining_width = info.surface_width - x_offset;
u16 remaining_height = info.surface_height - y_offset;
w = remaining_width;
h = remaining_height;
clipped = true;
return true;
}
if (info.surface_width >= requested_width && info.surface_height >= requested_height)
{
LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region");
w = requested_width;
h = requested_height;
clipped = true;
return true;
}
}
}
return false;
};
surface_type surface = nullptr;
bool clipped = false;
u16 x_offset = 0;
u16 y_offset = 0;
u16 w;
u16 h;
for (auto &tex_info : m_render_targets_storage)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
}
if (ignore_depth_formats)
return{};
//Check depth surfaces for overlap
for (auto &tex_info : m_depth_stencil_storage)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
}
return{};
}
};
}

View File

@ -54,6 +54,18 @@ struct render_target_traits
return rtt;
}
static
void get_surface_info(ID3D12Resource *surface, rsx::surface_format_info *info)
{
//TODO
auto desc = surface->GetDesc();
info->rsx_pitch = desc.Width;
info->native_pitch = desc.Width;
info->surface_width = desc.Width;
info->surface_height = desc.Height;
info->bpp = 1;
}
static
void prepare_rtt_for_drawing(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,

View File

@ -1028,7 +1028,6 @@ void GLGSRender::flip(int buffer)
// Calculate blit coordinates
coordi aspect_ratio;
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
@ -1055,19 +1054,33 @@ void GLGSRender::flip(int buffer)
// Find the source image
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
u32 absolute_address = buffer_region.address + buffer_region.base;
gl::texture *render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address);
m_flip_fbo.recreate();
m_flip_fbo.bind();
if (render_target_texture)
//The render might have been done offscreen and a blit used to display
//Check the texture cache for a blitted copy
const u32 size = buffer_pitch * buffer_height;
auto surface = m_gl_texture_cache.find_texture_from_range(absolute_address, size);
bool ignore_scaling = false;
if (surface != nullptr)
{
auto dims = surface->get_dimensions();
buffer_width = std::get<0>(dims);
buffer_height = std::get<1>(dims);
m_flip_fbo.color = surface->id();
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
__glcheck m_flip_fbo.color = *render_target_texture;
__glcheck m_flip_fbo.read_buffer(m_flip_fbo.color);
m_flip_fbo.color = *render_target_texture;
m_flip_fbo.read_buffer(m_flip_fbo.color);
ignore_scaling = true;
}
else
{
@ -1077,7 +1090,7 @@ void GLGSRender::flip(int buffer)
{
m_flip_tex_color.recreate(gl::texture::target::texture2D);
__glcheck m_flip_tex_color.config()
m_flip_tex_color.config()
.size({ (int)buffer_width, (int)buffer_height })
.type(gl::texture::type::uint_8_8_8_8)
.format(gl::texture::format::bgra);
@ -1089,23 +1102,38 @@ void GLGSRender::flip(int buffer)
{
std::unique_ptr<u8[]> temp(new u8[buffer_height * buffer_pitch]);
buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch);
__glcheck m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
}
else
{
__glcheck m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
}
m_flip_fbo.color = m_flip_tex_color;
__glcheck m_flip_fbo.read_buffer(m_flip_fbo.color);
m_flip_fbo.read_buffer(m_flip_fbo.color);
ignore_scaling = true;
}
if (!ignore_scaling && buffer_region.tile && buffer_region.tile->comp != CELL_GCM_COMPMODE_DISABLED)
{
LOG_ERROR(RSX, "Output buffer compression mode = 0x%X", buffer_region.tile->comp);
switch (buffer_region.tile->comp)
{
case CELL_GCM_COMPMODE_C32_2X2:
case CELL_GCM_COMPMODE_C32_2X1:
buffer_height = display_buffers[buffer].height / 2;
break;
}
}
// Blit source image to the screen
// Disable scissor test (affects blit)
glDisable(GL_SCISSOR_TEST);
gl::screen.clear(gl::buffers::color_depth_stencil);
__glcheck m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
gl::screen.clear(gl::buffers::color);
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
if (g_cfg.video.overlay)
{

View File

@ -1953,6 +1953,12 @@ namespace gl
case texture::target::texture3D: glFramebufferTexture3D(GL_FRAMEBUFFER, m_id, GL_TEXTURE_3D, rhs.id(), rhs.level(), 0); break;
}
}
void operator = (const GLuint rhs)
{
save_binding_state save(m_parent);
glFramebufferTexture2D(GL_FRAMEBUFFER, m_id, GL_TEXTURE_2D, rhs, 0);
}
};
class indexed_attachment : public attachment

View File

@ -115,40 +115,6 @@ namespace gl
{
return compatible_internal_format;
}
// For an address within the texture, extract this sub-section's rect origin
// Checks whether we need to scale the subresource if it is not handled in shader
// NOTE1: When surface->real_pitch < rsx_pitch, the surface is assumed to have been scaled to fill the rsx_region
std::tuple<bool, u16, u16> get_texture_subresource(u32 offset, bool scale_to_fit)
{
if (!offset)
{
return std::make_tuple(true, 0, 0);
}
if (!surface_height) surface_height = height();
if (!surface_width) surface_width = width();
u32 range = rsx_pitch * surface_height;
if (offset < range)
{
if (!surface_pixel_size)
surface_pixel_size = native_pitch / surface_width;
const u32 y = (offset / rsx_pitch);
u32 x = (offset % rsx_pitch) / surface_pixel_size;
if (scale_to_fit)
{
const f32 x_scale = (f32)rsx_pitch / native_pitch;
x = (u32)((f32)x / x_scale);
}
return std::make_tuple(true, (u16)x, (u16)y);
}
else
return std::make_tuple(false, 0, 0);
}
};
}
@ -235,6 +201,18 @@ struct gl_render_target_traits
return result;
}
static
void get_surface_info(gl::render_target *surface, rsx::surface_format_info *info)
{
const auto dims = surface->get_dimensions();
info->rsx_pitch = surface->get_rsx_pitch();
info->native_pitch = surface->get_native_pitch();
info->surface_width = std::get<0>(dims);
info->surface_height = std::get<1>(dims);
info->bpp = static_cast<u8>(info->native_pitch / info->surface_width);
}
static void prepare_rtt_for_drawing(void *, gl::render_target*) {}
static void prepare_rtt_for_sampling(void *, gl::render_target*) {}
@ -307,169 +285,6 @@ struct gl_render_target_traits
}
};
struct surface_subresource
{
gl::render_target *surface = nullptr;
u16 x = 0;
u16 y = 0;
u16 w = 0;
u16 h = 0;
bool is_bound = false;
bool is_depth_surface = false;
bool is_clipped = false;
surface_subresource() {}
surface_subresource(gl::render_target *src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth, bool _Clipped = false)
: surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth), is_clipped(_Clipped)
{}
};
class gl_render_targets : public rsx::surface_store<gl_render_target_traits>
{
private:
bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y, bool scale_to_fit)
{
bool is_subslice = false;
u16 x_offset = 0;
u16 y_offset = 0;
if (surface_address > texaddr)
return false;
u32 offset = texaddr - surface_address;
if (texaddr >= surface_address)
{
std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset, scale_to_fit);
if (is_subslice)
{
*x = x_offset;
*y = y_offset;
return true;
}
}
return false;
}
bool is_bound(u32 address, bool is_depth)
{
if (is_depth)
{
const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil);
return (bound_depth_address == address);
}
for (auto &surface: m_bound_render_targets)
{
const u32 bound_address = std::get<0>(surface);
if (bound_address == address)
return true;
}
return false;
}
bool fits(gl::render_target*, std::pair<u16, u16> &dims, u16 x_offset, u16 y_offset, u16 width, u16 height) const
{
if ((x_offset + width) > dims.first) return false;
if ((y_offset + height) > dims.second) return false;
return true;
}
public:
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit=false, bool crop=false, bool ignore_depth_formats=false)
{
gl::render_target *surface = nullptr;
u16 x_offset = 0;
u16 y_offset = 0;
for (auto &tex_info : m_render_targets_storage)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit))
{
if (surface->get_rsx_pitch() != requested_pitch)
continue;
auto dims = surface->get_dimensions();
if (scale_to_fit)
{
f32 pitch_scaling = (f32)requested_pitch / surface->get_native_pitch();
requested_width = (u16)((f32)requested_width / pitch_scaling);
}
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, false), false };
else
{
if (crop) //Forcefully fit the requested region by clipping and scaling
{
u16 remaining_width = dims.first - x_offset;
u16 remaining_height = dims.second - y_offset;
return{ surface, x_offset, y_offset, remaining_width, remaining_height, is_bound(this_address, false), false, true };
}
if (dims.first >= requested_width && dims.second >= requested_height)
{
LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region");
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, false), false, true };
}
}
}
}
if (ignore_depth_formats)
return{};
//Check depth surfaces for overlap
for (auto &tex_info : m_depth_stencil_storage)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit))
{
if (surface->get_rsx_pitch() != requested_pitch)
continue;
auto dims = surface->get_dimensions();
if (scale_to_fit)
{
f32 pitch_scaling = (f32)requested_pitch / surface->get_native_pitch();
requested_width = (u16)((f32)requested_width / pitch_scaling);
}
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, true), true };
else
{
if (crop) //Forcefully fit the requested region by clipping and scaling
{
u16 remaining_width = dims.first - x_offset;
u16 remaining_height = dims.second - y_offset;
return{ surface, x_offset, y_offset, remaining_width, remaining_height, is_bound(this_address, true), true, true };
}
if (dims.first >= requested_width && dims.second >= requested_height)
{
LOG_WARNING(RSX, "Overlapping depth surface exceeds bounds; returning full surface region");
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, true), true, true };
}
}
}
}
return {};
}
};

View File

@ -460,40 +460,6 @@ namespace gl
GLGSRender *m_renderer;
std::thread::id m_renderer_thread;
cached_texture_section *find_texture_from_dimensions(u32 texaddr, u32 w, u32 h)
{
reader_lock lock(m_section_mutex);
for (cached_texture_section &tex : read_only_memory_sections)
{
if (tex.matches(texaddr, w, h) && !tex.is_dirty())
return &tex;
}
return nullptr;
}
/**
* Searches for a texture from read_only memory sections
* Texture origin + size must be a subsection of the existing texture
*/
cached_texture_section *find_texture_from_range(u32 texaddr, u32 range)
{
reader_lock lock(m_section_mutex);
auto test = std::make_pair(texaddr, range);
for (cached_texture_section &tex : read_only_memory_sections)
{
if (tex.get_section_base() > texaddr)
continue;
if (tex.overlaps(test, true) && !tex.is_dirty())
return &tex;
}
return nullptr;
}
cached_texture_section& create_texture(u32 id, u32 texaddr, u32 texsize, u32 w, u32 h)
{
for (cached_texture_section &tex : read_only_memory_sections)
@ -536,19 +502,6 @@ namespace gl
clear_temporary_surfaces();
}
cached_texture_section* find_cached_rtt_section(u32 base, u32 size)
{
for (cached_texture_section &rtt : no_access_memory_sections)
{
if (rtt.matches(base, size))
{
return &rtt;
}
}
return nullptr;
}
cached_texture_section *create_locked_view_of_section(u32 base, u32 size)
{
cached_texture_section *region = find_cached_rtt_section(base, size);
@ -647,6 +600,53 @@ namespace gl
m_hw_blitter.destroy();
}
cached_texture_section *find_texture_from_dimensions(u32 texaddr, u32 w, u32 h)
{
reader_lock lock(m_section_mutex);
for (cached_texture_section &tex : read_only_memory_sections)
{
if (tex.matches(texaddr, w, h) && !tex.is_dirty())
return &tex;
}
return nullptr;
}
/**
* Searches for a texture from read_only memory sections
* Texture origin + size must be a subsection of the existing texture
*/
cached_texture_section *find_texture_from_range(u32 texaddr, u32 range)
{
reader_lock lock(m_section_mutex);
auto test = std::make_pair(texaddr, range);
for (cached_texture_section &tex : read_only_memory_sections)
{
if (tex.get_section_base() > texaddr)
continue;
if (tex.overlaps(test, true) && !tex.is_dirty())
return &tex;
}
return nullptr;
}
cached_texture_section* find_cached_rtt_section(u32 base, u32 size)
{
for (cached_texture_section &rtt : no_access_memory_sections)
{
if (rtt.matches(base, size))
{
return &rtt;
}
}
return nullptr;
}
template<typename RsxTextureType>
void upload_texture(int index, RsxTextureType &tex, rsx::gl::texture &gl_texture, gl_render_targets &m_rtts)
{
@ -739,7 +739,7 @@ namespace gl
const f32 internal_scale = (f32)tex_pitch / native_pitch;
const u32 internal_width = (const u32)(tex_width * internal_scale);
const surface_subresource rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, internal_width, tex_height, tex_pitch, true);
const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, internal_width, tex_height, tex_pitch, true);
if (rsc.surface)
{
//Check that this region is not cpu-dirty before doing a copy
@ -1078,7 +1078,7 @@ namespace gl
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0));
//Check if src/dst are parts of render targets
surface_subresource dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, true);
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, true);
dst_is_render_target = dst_subres.surface != nullptr;
u16 max_dst_width = dst.width;
@ -1097,7 +1097,8 @@ namespace gl
position2i dst_offset = { dst.offset_x, dst.offset_y };
size2i clip_dimensions = { dst.clip_width, dst.clip_height };
const size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.height };
//Dimensions passed are restricted to powers of 2; get real height from clip_height and width from pitch
const size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.clip_height };
//Offset in x and y for src is 0 (it is already accounted for when getting pixels_src)
//Reproject final clip onto source...
@ -1184,7 +1185,7 @@ namespace gl
}
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
surface_subresource src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, true);
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, true);
src_is_render_target = src_subres.surface != nullptr;
//Create source texture if does not exist
@ -1283,7 +1284,9 @@ namespace gl
//If so, add this texture to the no_access queue not the read_only queue
writer_lock lock(m_section_mutex);
cached_texture_section &cached = create_texture(texture_id, dst.rsx_address, dst.pitch * dst.clip_height, dst.width, dst.clip_height);
const u8 bpp = dst_is_argb8 ? 4 : 2;
const u32 real_width = dst.pitch / bpp;
cached_texture_section &cached = create_texture(texture_id, dst.rsx_address, dst.pitch * dst.clip_height, real_width, dst.clip_height);
//These textures are completely GPU resident so we dont watch for CPU access
//There's no data to be fetched from the CPU
//Its is possible for a title to attempt to read from the region, but the CPU path should be used in such cases

View File

@ -2080,10 +2080,12 @@ void VKGSRender::prepare_rtts()
for (u8 index : draw_buffers)
{
bound_images.push_back(std::get<1>(m_rtts.m_bound_render_targets[index]));
auto surface = std::get<1>(m_rtts.m_bound_render_targets[index]);
bound_images.push_back(surface);
m_surface_info[index].address = surface_addresses[index];
m_surface_info[index].pitch = surface_pitchs[index];
surface->rsx_pitch = surface_pitchs[index];
if (surface_pitchs[index] <= 64)
{
@ -2095,10 +2097,12 @@ void VKGSRender::prepare_rtts()
if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0)
{
bound_images.push_back(std::get<1>(m_rtts.m_bound_depth_stencil));
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
bound_images.push_back(ds);
m_depth_surface_info.address = zeta_address;
m_depth_surface_info.pitch = rsx::method_registers.surface_z_pitch();
ds->rsx_pitch = m_depth_surface_info.pitch;
if (m_depth_surface_info.pitch <= 64 && clip_width > m_depth_surface_info.pitch)
m_depth_surface_info.pitch = 0;
@ -2519,3 +2523,9 @@ void VKGSRender::flip(int buffer)
m_uploads_8k = 0;
m_uploads_16k = 0;
}
bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
{
return m_texture_cache.upload_scaled_image(src, dst, interpolate, (*m_device), *m_current_command_buffer, m_memory_type_mapping,
m_swap_chain->get_present_queue(), m_rtts, m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
}

View File

@ -294,6 +294,7 @@ protected:
void flip(int buffer) override;
void do_local_task() override;
bool scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) override;
bool on_access_violation(u32 address, bool is_writing) override;
void on_notify_memory_unmapped(u32 address_base, u32 size) override;

View File

@ -20,6 +20,7 @@ namespace vk
{
bool dirty = false;
u16 native_pitch = 0;
u16 rsx_pitch = 0;
VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
std::unique_ptr<vk::image_view> view;
@ -171,6 +172,16 @@ namespace rsx
return ds;
}
static
void get_surface_info(vk::render_target *surface, rsx::surface_format_info *info)
{
info->rsx_pitch = surface->rsx_pitch;
info->native_pitch = surface->native_pitch;
info->surface_width = surface->info.extent.width;
info->surface_height = surface->info.extent.height;
info->bpp = static_cast<u8>(info->native_pitch / info->surface_width);
}
static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface)
{
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag);

View File

@ -94,7 +94,16 @@ namespace vk
if (!width && !height && !mipmaps)
return true;
return (width == this->width && height == this->height && mipmaps == this->mipmaps);
if (width && width != this->width)
return false;
if (height && height != this->height)
return false;
if (mipmaps && mipmaps != this->mipmaps)
return false;
return true;
}
return false;
@ -361,6 +370,40 @@ namespace vk
const s32 m_max_zombie_objects = 32; //Limit on how many texture objects to keep around for reuse after they are invalidated
s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory
cached_texture_section *find_texture_from_range(u32 rsx_address, u32 range)
{
auto test = std::make_pair(rsx_address, range);
for (auto &address_range : m_cache)
{
auto &range_data = address_range.second;
for (auto &tex : range_data.data)
{
if (!tex.is_dirty() && tex.overlaps(test, true))
return &tex;
}
}
return nullptr;
}
cached_texture_section *find_texture_from_dimensions(u32 rsx_address, u32 rsx_size, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
auto found = m_cache.find(rsx_address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{
if (tex.matches(rsx_address, width, height, mipmaps) && !tex.is_dirty())
{
return &tex;
}
}
}
return nullptr;
}
cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
{
@ -565,7 +608,7 @@ namespace vk
}
template <typename RsxTextureType>
vk::image_view* upload_texture(command_buffer cmd, RsxTextureType &tex, rsx::vk_render_targets &m_rtts, const vk::memory_type_mapping &memory_type_mapping, vk_data_heap& upload_heap, vk::buffer* upload_buffer)
vk::image_view* upload_texture(command_buffer &cmd, RsxTextureType &tex, rsx::vk_render_targets &m_rtts, const vk::memory_type_mapping &memory_type_mapping, vk_data_heap& upload_heap, vk::buffer* upload_buffer)
{
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
const u32 range = (u32)get_texture_size(tex);
@ -1048,5 +1091,270 @@ namespace vk
value.misses --;
}
}
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate,
vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue,
rsx::vk_render_targets &m_rtts, vk_data_heap &upload_heap, vk::buffer* upload_buffer)
{
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag
//TODO: Verify correct behavior
bool src_is_render_target = false;
bool dst_is_render_target = false;
bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8);
bool src_is_argb8 = (src.format == rsx::blit_engine::transfer_source_format::a8r8g8b8);
VkFormat src_vk_format = src_is_argb8 ? VK_FORMAT_B8G8R8A8_UNORM : VK_FORMAT_R5G6B5_UNORM_PACK16;
vk::image* vram_texture = nullptr;
vk::image* dest_texture = nullptr;
const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0));
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0));
//Check if src/dst are parts of render targets
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, true);
dst_is_render_target = dst_subres.surface != nullptr;
u16 max_dst_width = dst.width;
u16 max_dst_height = dst.height;
//Prepare areas and offsets
//Copy from [src.offset_x, src.offset_y] a region of [clip.width, clip.height]
//Stretch onto [dst.offset_x, y] with clipping performed on the source region
//The implementation here adds the inverse scaled clip dimensions onto the source to completely bypass final clipping step
float scale_x = (f32)dst.width / src.width;
float scale_y = (f32)dst.height / src.height;
//Clip offset is unused if the clip offsets are reprojected onto the source
position2i clip_offset = { 0, 0 };//{ dst.clip_x, dst.clip_y };
position2i dst_offset = { dst.offset_x, dst.offset_y };
size2i clip_dimensions = { dst.clip_width, dst.clip_height };
//Dimensions passed are restricted to powers of 2; get real height from clip_height and width from pitch
const size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.clip_height };
//Offset in x and y for src is 0 (it is already accounted for when getting pixels_src)
//Reproject final clip onto source...
const u16 src_w = (const u16)((f32)clip_dimensions.width / scale_x);
const u16 src_h = (const u16)((f32)clip_dimensions.height / scale_y);
areai src_area = { 0, 0, src_w, src_h };
areai dst_area = { 0, 0, dst.clip_width, dst.clip_height };
//If destination is neither a render target nor an existing texture in VRAM
//its possible that this method is being used to perform a memcpy into RSX memory, so we check
//parameters. Whenever a simple memcpy can get the job done, use it instead.
//Dai-3-ji Super Robot Taisen for example uses this to copy program code to GPU RAM
bool is_memcpy = false;
u32 memcpy_bytes_length = 0;
if (dst_is_argb8 == src_is_argb8 && !dst.swizzled)
{
if ((src.slice_h == 1 && dst.clip_height == 1) ||
(dst.clip_width == src.width && dst.clip_height == src.slice_h && src.pitch == dst.pitch))
{
const u8 bpp = dst_is_argb8 ? 4 : 2;
is_memcpy = true;
memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
}
}
if (!dst_is_render_target)
{
//First check if this surface exists in VRAM with exact dimensions
//Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible
auto cached_dest = find_texture_from_dimensions(dst.rsx_address, dst.pitch * dst.clip_height, dst_dimensions.width, dst_dimensions.height);
//Check for any available region that will fit this one
if (!cached_dest) cached_dest = find_texture_from_range(dst.rsx_address, dst.pitch * dst.clip_height);
if (cached_dest)
{
//TODO: Verify that the new surface will fit
dest_texture = cached_dest->get_texture().get();
//TODO: Move this code into utils since it is used alot
const u32 address_offset = dst.rsx_address - cached_dest->get_section_base();
const u16 bpp = dst_is_argb8 ? 4 : 2;
const u16 offset_y = address_offset / dst.pitch;
const u16 offset_x = address_offset % dst.pitch;
dst_offset.x += offset_x / bpp;
dst_offset.y += offset_y;
max_dst_width = cached_dest->get_width();
max_dst_height = cached_dest->get_height();
}
else if (is_memcpy)
{
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
}
}
else
{
dst_offset.x = dst_subres.x;
dst_offset.y = dst_subres.y;
dest_texture = dst_subres.surface;
max_dst_width = dst_subres.surface->width();
max_dst_height = dst_subres.surface->height();
if (is_memcpy)
{
//Some render target descriptions are actually invalid
//Confirm this is a flushable RTT
const auto rsx_pitch = dst_subres.surface->rsx_pitch;
const auto native_pitch = dst_subres.surface->native_pitch;
if (rsx_pitch <= 64 && native_pitch != rsx_pitch)
{
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
}
}
}
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, true);
src_is_render_target = src_subres.surface != nullptr;
//Create source texture if does not exist
if (!src_is_render_target)
{
auto preloaded_texture = find_texture_from_dimensions(src_address, src.pitch * src.slice_h, src.width, src.slice_h);
if (preloaded_texture != nullptr)
{
vram_texture = preloaded_texture->get_texture().get();
}
else
{
flush_address(src_address, dev, cmd, memory_types, submit_queue);
writer_lock lock(m_cache_mutex);
//Upload texture from CPU
vk::image *image = new vk::image(*vk::get_current_renderer(), memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D,
src_vk_format,
src.width, src.slice_h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0);
vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), image->value, VK_IMAGE_VIEW_TYPE_2D, src_vk_format,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A },
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
cached_texture_section& region = find_cached_texture(dst.rsx_address, src.pitch * src.slice_h, true, src.width, src.slice_h, 1);
region.reset(src.rsx_address, src.pitch * src.slice_h);
region.create(src.width, src.slice_h, 1, 1, view, dest_texture);
region.protect(utils::protection::ro);
region.set_dirty(false);
read_only_range = region.get_min_max(read_only_range);
vk::enter_uninterruptible();
std::vector<rsx_subresource_layout> layout(1);
auto &subres = layout.back();
subres.width_in_block = src.width;
subres.height_in_block = src.slice_h;
subres.pitch_in_bytes = src.pitch;
subres.depth = 1;
subres.data = {(const gsl::byte*)src.pixels, src.pitch * src.slice_h};
copy_mipmaped_image_using_buffer(cmd, image->value, layout, src_vk_format, false, 1,
upload_heap, upload_buffer);
vk::leave_uninterruptible();
}
}
else
{
if (src_subres.w != clip_dimensions.width ||
src_subres.h != clip_dimensions.height)
{
f32 subres_scaling_x = (f32)src.pitch / src_subres.surface->native_pitch;
dst_area.x2 = (int)(src_subres.w * scale_x * subres_scaling_x);
dst_area.y2 = (int)(src_subres.h * scale_y);
}
src_area.x2 = src_subres.w;
src_area.y2 = src_subres.h;
src_area.x1 += src_subres.x;
src_area.x2 += src_subres.x;
src_area.y1 += src_subres.y;
src_area.y2 += src_subres.y;
vram_texture = src_subres.surface;
}
//Validate clip offsets (Persona 4 Arena at 720p)
//Check if can fit
//NOTE: It is possible that the check is simpler (if (clip_x >= clip_width))
//Needs verification
if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0;
if ((dst.offset_y + dst.clip_y + dst.clip_height) > max_dst_height) dst.clip_y = 0;
if (dst.clip_x || dst.clip_y)
{
//Reproject clip offsets onto source
const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / scale_x);
const u16 scaled_clip_offset_y = (const u16)((f32)dst.clip_y / scale_y);
src_area.x1 += scaled_clip_offset_x;
src_area.x2 += scaled_clip_offset_x;
src_area.y1 += scaled_clip_offset_y;
src_area.y2 += scaled_clip_offset_y;
}
bool dest_exists = dest_texture != nullptr;
const VkFormat dst_vk_format = dst_is_argb8 ? VK_FORMAT_R8G8B8A8_UNORM : VK_FORMAT_R5G6B5_UNORM_PACK16;
const u8 bpp = dst_is_argb8 ? 4 : 2;
const u32 real_width = dst.pitch / bpp;
if (!dest_exists)
{
dest_texture = new vk::image(*vk::get_current_renderer(), memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D,
dst_vk_format,
real_width, dst.clip_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0);
}
//Copy data
copy_scaled_image(cmd, vram_texture->value, dest_texture->value, vram_texture->current_layout, dest_texture->current_layout,
src_area.x1, src_area.y1, src_w, src_h, dst_area.x1, dst_area.y1, dst.clip_width, dst.clip_height, 1, VK_IMAGE_ASPECT_COLOR_BIT);
if (dest_exists)
return true;
//TODO: Verify if any titles ever scale into CPU memory. It defeats the purpose of uploading data to the GPU, but it could happen
//If so, add this texture to the no_access queue not the read_only queue
cached_texture_section& region = find_cached_texture(dst.rsx_address, dst.pitch * dst.clip_height, true, real_width, dst.clip_height, 1);
writer_lock lock(m_cache_mutex);
//These textures are completely GPU resident so we dont watch for CPU access
//There's no data to be fetched from the CPU
//Its is possible for a title to attempt to read from the region, but the CPU path should be used in such cases
vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), dest_texture->value, VK_IMAGE_VIEW_TYPE_2D, dst_vk_format,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A },
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
region.reset(dst.rsx_address, dst.pitch * dst.clip_height);
region.create(real_width, dst.clip_height, 1, 1, view, dest_texture);
region.protect(utils::protection::rw);
region.set_dirty(false);
read_only_range = region.get_min_max(read_only_range);
return true;
}
};
}

View File

@ -580,37 +580,6 @@ namespace rsx
return;
}
if (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER)
{
//HACK: it's extension of the flip-hack. remove this when textures cache would be properly implemented
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
u32 begin = rsx->display_buffers[i].offset;
if (dst_offset < begin || !begin)
{
continue;
}
if (rsx->display_buffers[i].width < 720 || rsx->display_buffers[i].height < 480)
{
continue;
}
if (begin == dst_offset)
{
return;
}
u32 end = begin + rsx->display_buffers[i].height * rsx->display_buffers[i].pitch;
if (dst_offset < end)
{
return;
}
}
}
const u32 in_bpp = (src_color_format == rsx::blit_engine::transfer_source_format::r5g6b5) ? 2 : 4; // bytes per pixel
const u32 out_bpp = (dst_color_format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? 2 : 4;