rsx: Implement unaligned surface inheritance with hierachial contribution

- Allows render targets to behave like stacked 3D views same as shader inputs are resolved
- Basically implements most of 'Read Color/Depth Buffers" option for 'free'.
- Allows splitting RTV/DSV resources if they are superceded by a partial surface
- Also allows intersecting new resources through the surface cache for proper inheritance from other scattered data
- TODO: Refactor bind_surface_as_rtt and bind_surface_as_ds to reduce asinine code duplication
This commit is contained in:
kd-11 2019-05-07 21:51:53 +03:00 committed by kd-11
parent a016728a9e
commit 88c20afd3a
9 changed files with 751 additions and 331 deletions

View File

@ -2,6 +2,7 @@
#include "Utilities/GSL.h"
#include "Emu/Memory/vm.h"
#include "TextureUtils.h"
#include "../GCM.h"
#include "../rsx_utils.h"
#include <list>
@ -59,24 +60,65 @@ namespace rsx
u8 bpp;
};
template <typename image_storage_type>
struct surface_hierachy_info
template <typename surface_type>
struct deferred_clipped_region
{
struct memory_overlap_t
u16 src_x, src_y, dst_x, dst_y, width, height;
f32 transfer_scale_x, transfer_scale_y;
surface_type target;
surface_type source;
template <typename T>
deferred_clipped_region<T> cast() const
{
image_storage_type _ref;
u32 memory_address;
u32 x;
u32 y;
u32 w;
u32 h;
};
deferred_clipped_region<T> ret;
ret.src_x = src_x;
ret.src_y = src_y;
ret.dst_x = dst_x;
ret.dst_y = dst_y;
ret.width = width;
ret.height = height;
ret.transfer_scale_x = transfer_scale_x;
ret.transfer_scale_y = transfer_scale_y;
ret.target = (T)(target);
ret.source = (T)(source);
u32 memory_address;
u32 memory_range;
image_storage_type memory_contents;
return ret;
}
std::vector<memory_overlap_t> overlapping_set;
operator bool() const
{
return (source != nullptr);
}
template <typename T>
void init_transfer(T target_surface)
{
if (!width)
{
// Perform intersection here
const auto region = rsx::get_transferable_region(target_surface);
width = std::get<0>(region);
height = std::get<1>(region);
transfer_scale_x = f32(std::get<2>(region)) / width;
transfer_scale_y = f32(std::get<3>(region)) / height;
target = target_surface;
}
}
areai src_rect() const
{
verify(HERE), width;
return { src_x, src_y, src_x + width, src_y + height };
}
areai dst_rect() const
{
verify(HERE), width;
return { dst_x, dst_y, dst_x + u16(width * transfer_scale_x + 0.5f), dst_y + u16(height * transfer_scale_y + 0.5f) };
}
};
template <typename image_storage_type>
@ -86,7 +128,7 @@ namespace rsx
std::array<std::pair<u32, u64>, 5> memory_tag_samples;
bool dirty = false;
image_storage_type old_contents = nullptr;
deferred_clipped_region<image_storage_type> old_contents{};
rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample;
GcmTileInfo *tile = nullptr;
@ -142,11 +184,60 @@ namespace rsx
{
if (!other || other->get_rsx_pitch() != this->get_rsx_pitch())
{
old_contents = nullptr;
old_contents = {};
return;
}
old_contents = other;
old_contents = {};
old_contents.source = other;
}
template<typename T>
void set_old_contents_region(const T& region, bool normalized)
{
// NOTE: This method will not perform pitch verification!
verify(HERE), region.source;
old_contents = region.template cast<image_storage_type>();
// Reverse normalization process if needed
if (normalized)
{
const u16 bytes_to_texels_x = region.source->get_bpp() * (region.source->write_aa_mode == rsx::surface_antialiasing::center_1_sample? 1 : 2);
const u16 rows_to_texels_y = (region.source->write_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples? 2 : 1);
old_contents.src_x /= bytes_to_texels_x;
old_contents.src_y /= rows_to_texels_y;
old_contents.width /= bytes_to_texels_x;
old_contents.height /= rows_to_texels_y;
const u16 bytes_to_texels_x2 = (get_bpp() * (write_aa_mode == rsx::surface_antialiasing::center_1_sample? 1 : 2));
const u16 rows_to_texels_y2 = (write_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples)? 2 : 1;
old_contents.dst_x /= bytes_to_texels_x2;
old_contents.dst_y /= rows_to_texels_y2;
old_contents.transfer_scale_x = f32(bytes_to_texels_x2) / bytes_to_texels_x;
old_contents.transfer_scale_y = f32(rows_to_texels_y2) / rows_to_texels_y;
}
// Apply resolution scale if needed
if (g_cfg.video.resolution_scale_percent != 100)
{
auto src_width = rsx::apply_resolution_scale(old_contents.width, true, old_contents.source->width());
auto src_height = rsx::apply_resolution_scale(old_contents.height, true, old_contents.source->height());
auto dst_width = rsx::apply_resolution_scale(old_contents.width, true, old_contents.target->width());
auto dst_height = rsx::apply_resolution_scale(old_contents.height, true, old_contents.target->height());
old_contents.transfer_scale_x *= f32(dst_width) / src_width;
old_contents.transfer_scale_y *= f32(dst_height) / src_height;
old_contents.width = src_width;
old_contents.height = src_height;
old_contents.src_x = rsx::apply_resolution_scale(old_contents.src_x, false, old_contents.source->width());
old_contents.src_y = rsx::apply_resolution_scale(old_contents.src_y, false, old_contents.source->height());
old_contents.dst_x = rsx::apply_resolution_scale(old_contents.dst_x, false, old_contents.target->width());
old_contents.dst_y = rsx::apply_resolution_scale(old_contents.dst_y, false, old_contents.target->height());
}
}
void queue_tag(u32 address)
@ -207,7 +298,22 @@ namespace rsx
read_aa_mode = write_aa_mode;
dirty = false;
old_contents = nullptr;
old_contents = {};
}
// Returns the rect area occupied by this surface expressed as an 8bpp image with no AA
areau get_normalized_memory_area() const
{
const u16 internal_width = get_native_pitch() * (write_aa_mode > rsx::surface_antialiasing::center_1_sample? 2: 1);
const u16 internal_height = get_surface_height() * (write_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples? 2: 1);
return { 0, 0, internal_width, internal_height };
}
rsx::address_range get_memory_range() const
{
const u32 internal_height = get_surface_height() * (write_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples? 2: 1);
return rsx::address_range::start_length(memory_tag_samples[0].first, internal_height * get_rsx_pitch());
}
};
@ -257,6 +363,11 @@ namespace rsx
}
}
constexpr u32 get_aa_factor_u(surface_antialiasing aa_mode)
{
return (aa_mode == surface_antialiasing::center_1_sample)? 1 : 2;
}
constexpr u32 get_aa_factor_v(surface_antialiasing aa_mode)
{
switch (aa_mode)
@ -284,100 +395,251 @@ namespace rsx
rsx::address_range m_depth_stencil_memory_range;
public:
std::array<std::tuple<u32, surface_type>, 4> m_bound_render_targets = {};
std::tuple<u32, surface_type> m_bound_depth_stencil = {};
std::array<std::pair<u32, surface_type>, 4> m_bound_render_targets = {};
std::pair<u32, surface_type> m_bound_depth_stencil = {};
std::list<surface_storage_type> invalidated_resources;
std::vector<surface_hierachy_info<surface_type>> m_memory_tree;
u64 cache_tag = 0ull;
u64 write_tag = 0ull;
u64 memory_tag = 0ull;
surface_store() = default;
~surface_store() = default;
surface_store(const surface_store&) = delete;
private:
void generate_render_target_memory_tree()
template <bool is_depth_surface>
void split_surface_region(command_list_type cmd, u32 address, surface_type prev_surface, u16 width, u16 height, u8 bpp, rsx::surface_antialiasing aa)
{
auto process_entry = [](surface_hierachy_info<surface_type>& block_info,
const surface_format_info& info,
u32 memory_address, u32 memory_end,
u32 address, surface_type surface)
#ifndef INCOMPLETE_SURFACE_CACHE_IMPL
auto insert_new_surface = [&](
u32 new_address,
deferred_clipped_region<surface_type>& region,
std::unordered_map<u32, surface_storage_type>& data)
{
verify(HERE), prev_surface;
if (prev_surface->read_barrier(cmd); !prev_surface->test())
{
if (address <= memory_address) // also intentionally fails on self-test
return;
}
if (address >= memory_end)
return;
surface_format_info info2{};
Traits::get_surface_info(surface, &info2);
const auto offset = (address - memory_address);
const auto offset_y = (offset / info.rsx_pitch);
const auto offset_x = (offset % info.rsx_pitch) / info.bpp;
const auto pitch2 = info2.bpp * info2.surface_width;
const bool fits_w = ((offset % info.rsx_pitch) + pitch2) <= info.rsx_pitch;
const bool fits_h = ((offset_y + info2.surface_height) * info.rsx_pitch) <= (memory_end - memory_address);
if (fits_w && fits_h)
surface_storage_type sink;
if (const auto found = data.find(new_address);
found != data.end())
{
typename surface_hierachy_info<surface_type>::memory_overlap_t overlap{};
overlap._ref = surface;
overlap.memory_address = address;
overlap.x = offset_x;
overlap.y = offset_y;
overlap.w = info2.surface_width;
overlap.h = info2.surface_height;
block_info.overlapping_set.push_back(overlap);
if (Traits::is_compatible_surface(Traits::get(found->second), region.source, region.width, region.height, 1))
{
// There is no need to erase due to the reinsertion below
sink = std::move(found->second);
}
else
{
// TODO
// TODO: Merge the 2 regions
invalidated_resources.push_back(std::move(found->second));
data.erase(new_address);
auto &old = invalidated_resources.back();
Traits::notify_surface_invalidated(old);
}
}
Traits::clone_surface(cmd, sink, region.source, new_address, region);
verify(HERE), region.target == Traits::get(sink);
data[new_address] = std::move(sink);
};
auto process_block = [this, process_entry](u32 memory_address, surface_type surface)
{
surface_hierachy_info<surface_type> block_info;
surface_format_info info{};
Traits::get_surface_info(surface, &info);
const auto memory_end = memory_address + (info.rsx_pitch * info.surface_height);
// Define incoming region
size2u old, _new;
for (const auto &rtt : m_render_targets_storage)
const auto prev_area = prev_surface->get_normalized_memory_area();
old.width = prev_area.x2;
old.height = prev_area.y2;
_new.width = width * bpp * get_aa_factor_u(aa);
_new.height = height * get_aa_factor_v(aa);
if (old.width > _new.width)
{
process_entry(block_info, info, memory_address, memory_end, rtt.first, Traits::get(rtt.second));
// Split in X
const u32 baseaddr = address + _new.width;
const u32 bytes_to_texels_x = (bpp * get_aa_factor_u(prev_surface->write_aa_mode));
deferred_clipped_region<surface_type> copy;
copy.src_x = _new.width / bytes_to_texels_x;
copy.src_y = 0;
copy.dst_x = 0;
copy.dst_y = 0;
copy.width = (old.width - _new.width) / bytes_to_texels_x;
copy.height = prev_surface->get_surface_height();
copy.transfer_scale_x = 1.f;
copy.transfer_scale_y = 1.f;
copy.target = nullptr;
copy.source = prev_surface;
if constexpr (is_depth_surface)
{
insert_new_surface(baseaddr, copy, m_depth_stencil_storage);
}
else
{
insert_new_surface(baseaddr, copy, m_render_targets_storage);
}
}
for (const auto &ds : m_depth_stencil_storage)
if (old.height > _new.height)
{
process_entry(block_info, info, memory_address, memory_end, ds.first, Traits::get(ds.second));
// Split in Y
const u32 baseaddr = address + (_new.height * prev_surface->get_rsx_pitch());
const u32 bytes_to_texels_x = (bpp * get_aa_factor_u(prev_surface->write_aa_mode));
deferred_clipped_region<surface_type> copy;
copy.src_x = 0;
copy.src_y = _new.height / get_aa_factor_v(prev_surface->write_aa_mode);
copy.dst_x = 0;
copy.dst_y = 0;
copy.width = std::min(_new.width, old.width) / bytes_to_texels_x;
copy.height = (old.height - _new.height) / get_aa_factor_v(prev_surface->write_aa_mode);
copy.transfer_scale_x = 1.f;
copy.transfer_scale_y = 1.f;
copy.target = nullptr;
copy.source = prev_surface;
if constexpr (is_depth_surface)
{
insert_new_surface(baseaddr, copy, m_depth_stencil_storage);
}
else
{
insert_new_surface(baseaddr, copy, m_render_targets_storage);
}
}
#endif
}
if (!block_info.overlapping_set.empty())
template <bool is_depth_surface>
void intersect_surface_region(command_list_type cmd, u32 address, surface_type new_surface)
{
block_info.memory_address = memory_address;
block_info.memory_range = (memory_end - memory_address);
block_info.memory_contents = surface;
#ifndef INCOMPLETE_SURFACE_CACHE_IMPL
auto scan_list = [&new_surface](const rsx::address_range& mem_range,
std::unordered_map<u32, surface_storage_type>& data) -> std::vector<std::pair<u32, surface_type>>
{
std::vector<std::pair<u32, surface_type>> result;
for (const auto &e : data)
{
auto surface = Traits::get(e.second);
m_memory_tree.push_back(block_info);
if (new_surface == surface || e.second->dirty || e.second->last_use_tag <= new_surface->last_use_tag)
{
// Do not bother synchronizing with uninitialized data
continue;
}
// Memory partition check
if (mem_range.start >= 0xc0000000)
{
if (e.first < 0xc0000000) continue;
}
else
{
if (e.first >= 0xc0000000) continue;
}
// Pitch check
if (!rsx::pitch_compatible(surface, new_surface))
{
continue;
}
// Range check
const rsx::address_range this_range = surface->get_memory_range();
if (!this_range.overlaps(mem_range))
{
continue;
}
result.push_back({ e.first, surface });
}
return result;
};
for (auto &rtt : m_bound_render_targets)
const rsx::address_range mem_range = new_surface->get_memory_range();
const auto list1 = scan_list(mem_range, m_render_targets_storage);
const auto list2 = scan_list(mem_range, m_depth_stencil_storage);
if (list1.empty() && list2.empty())
{
if (const auto address = std::get<0>(rtt))
{
process_block(address, std::get<1>(rtt));
}
return;
}
if (const auto address = std::get<0>(m_bound_depth_stencil))
std::vector<std::pair<u32, surface_type>> surface_info;
if (list1.empty())
{
process_block(address, std::get<1>(m_bound_depth_stencil));
surface_info = std::move(list2);
}
else if (list2.empty())
{
surface_info = std::move(list1);
}
else
{
surface_info = std::move(list1);
surface_info.reserve(list1.size() + list2.size());
for (const auto& e : list2) surface_info.push_back(e);
}
if (UNLIKELY(surface_info.size() > 1))
{
// Sort with newest first for early exit
std::sort(surface_info.begin(), surface_info.end(), [](const auto& a, const auto& b)
{
return (a.second->last_use_tag > b.second->last_use_tag);
});
}
// TODO: Modify deferred_clip_region::direct_copy() to take a few more things into account!
const areau child_region = new_surface->get_normalized_memory_area();
const auto child_w = child_region.width();
const auto child_h = child_region.height();
const auto pitch = new_surface->get_rsx_pitch();
for (const auto &e: surface_info)
{
const auto parent_region = e.second->get_normalized_memory_area();
const auto parent_w = parent_region.width();
const auto parent_h = parent_region.height();
const auto rect = rsx::intersect_region(e.first, parent_w, parent_h, 1, address, child_w, child_h, 1, pitch);
const auto src_offset = std::get<0>(rect);
const auto dst_offset = std::get<1>(rect);
const auto size = std::get<2>(rect);
if (src_offset.x >= parent_w || src_offset.y >= parent_h)
{
continue;
}
if (dst_offset.x >= child_w || dst_offset.y >= child_h)
{
continue;
}
// TODO: Eventually need to stack all the overlapping regions, but for now just do the latest rect in the space
deferred_clipped_region<surface_type> region;
region.src_x = src_offset.x;
region.src_y = src_offset.y;
region.dst_x = dst_offset.x;
region.dst_y = dst_offset.y;
region.width = size.width;
region.height = size.height;
region.source = e.second;
region.target = new_surface;
new_surface->set_old_contents_region(region, true);
new_surface->dirty = true;
break;
}
#endif
}
protected:
@ -401,46 +663,48 @@ namespace rsx
surface_storage_type new_surface_storage;
surface_type old_surface = nullptr;
surface_type new_surface = nullptr;
surface_type convert_surface = nullptr;
// Remove any depth surfaces occupying this memory address (TODO: Discard all overlapping range)
auto aliased_depth_surface = m_depth_stencil_storage.find(address);
if (aliased_depth_surface != m_depth_stencil_storage.end())
{
Traits::notify_surface_invalidated(aliased_depth_surface->second);
convert_surface = Traits::get(aliased_depth_surface->second);
invalidated_resources.push_back(std::move(aliased_depth_surface->second));
m_depth_stencil_storage.erase(aliased_depth_surface);
}
bool store = true;
// Check if render target already exists
auto It = m_render_targets_storage.find(address);
if (It != m_render_targets_storage.end())
{
surface_storage_type &rtt = It->second;
if (Traits::rtt_has_format_width_height(rtt, color_format, width, height))
{
if (Traits::surface_is_pitch_compatible(rtt, pitch))
Traits::notify_surface_persist(rtt);
else
Traits::invalidate_surface_contents(command_list, Traits::get(rtt), nullptr, address, pitch);
const bool pitch_compatible = Traits::surface_is_pitch_compatible(rtt, pitch);
Traits::prepare_rtt_for_drawing(command_list, Traits::get(rtt));
return Traits::get(rtt);
if (pitch_compatible)
{
// Preserve memory outside the area to be inherited if needed
const u8 bpp = get_format_block_size_in_bytes(color_format);
split_surface_region<false>(command_list, address, Traits::get(rtt), (u16)width, (u16)height, bpp, antialias);
}
if (Traits::rtt_has_format_width_height(rtt, color_format, width, height))
{
if (pitch_compatible)
Traits::notify_surface_persist(rtt);
else
Traits::invalidate_surface_contents(command_list, Traits::get(rtt), address, pitch);
Traits::prepare_rtt_for_drawing(command_list, Traits::get(rtt));
new_surface = Traits::get(rtt);
store = false;
}
else
{
old_surface = Traits::get(rtt);
old_surface_storage = std::move(rtt);
m_render_targets_storage.erase(address);
}
}
if (!new_surface)
{
// Range test
const auto aa_factor_v = get_aa_factor_v(antialias);
rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height * aa_factor_v));
m_render_targets_memory_range = range.get_min_max(m_render_targets_memory_range);
// Select source of original data if any
auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface;
// Search invalidated resources for a suitable surface
for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++)
{
@ -456,16 +720,20 @@ namespace rsx
rtt = std::move(old_surface_storage);
}
else
{
// rtt is now empty - erase it
invalidated_resources.erase(It);
}
new_surface = Traits::get(new_surface_storage);
Traits::invalidate_surface_contents(command_list, new_surface, contents_to_copy, address, pitch);
Traits::invalidate_surface_contents(command_list, new_surface, address, pitch);
Traits::prepare_rtt_for_drawing(command_list, new_surface);
break;
}
}
}
// Check for stale storage
if (old_surface != nullptr && new_surface == nullptr)
{
// This was already determined to be invalid and is excluded from testing above
@ -473,15 +741,46 @@ namespace rsx
invalidated_resources.push_back(std::move(old_surface_storage));
}
if (new_surface != nullptr)
if (!new_surface)
{
//New surface was found among existing surfaces
m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, pitch, std::forward<Args>(extra_params)...);
new_surface = Traits::get(m_render_targets_storage[address]);
}
else if (store)
{
// New surface was found among invalidated surfaces
m_render_targets_storage[address] = std::move(new_surface_storage);
return new_surface;
}
m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, pitch, contents_to_copy, std::forward<Args>(extra_params)...);
return Traits::get(m_render_targets_storage[address]);
#ifndef INCOMPLETE_SURFACE_CACHE_IMPL
// Check if old_surface is 'new' and avoid intersection
if (old_surface && old_surface->last_use_tag == write_tag)
{
new_surface->set_old_contents(old_surface);
}
else
#endif
{
intersect_surface_region<false>(command_list, address, new_surface);
}
// Remove and preserve if possible any overlapping/replaced depth surface
auto aliased_depth_surface = m_depth_stencil_storage.find(address);
if (aliased_depth_surface != m_depth_stencil_storage.end())
{
if (Traits::surface_is_pitch_compatible(aliased_depth_surface->second, pitch))
{
// Preserve memory outside the area to be inherited if needed
const u8 bpp = get_format_block_size_in_bytes(color_format);
split_surface_region<true>(command_list, address, Traits::get(aliased_depth_surface->second), (u16)width, (u16)height, bpp, antialias);
}
Traits::notify_surface_invalidated(aliased_depth_surface->second);
invalidated_resources.push_back(std::move(aliased_depth_surface->second));
m_depth_stencil_storage.erase(aliased_depth_surface);
}
return new_surface;
}
template <typename ...Args>
@ -497,46 +796,46 @@ namespace rsx
surface_storage_type new_surface_storage;
surface_type old_surface = nullptr;
surface_type new_surface = nullptr;
surface_type convert_surface = nullptr;
// Remove any color surfaces occupying this memory range (TODO: Discard all overlapping surfaces)
auto aliased_rtt_surface = m_render_targets_storage.find(address);
if (aliased_rtt_surface != m_render_targets_storage.end())
{
Traits::notify_surface_invalidated(aliased_rtt_surface->second);
convert_surface = Traits::get(aliased_rtt_surface->second);
invalidated_resources.push_back(std::move(aliased_rtt_surface->second));
m_render_targets_storage.erase(aliased_rtt_surface);
}
bool store = true;
auto It = m_depth_stencil_storage.find(address);
if (It != m_depth_stencil_storage.end())
{
surface_storage_type &ds = It->second;
if (Traits::ds_has_format_width_height(ds, depth_format, width, height))
{
if (Traits::surface_is_pitch_compatible(ds, pitch))
Traits::notify_surface_persist(ds);
else
Traits::invalidate_surface_contents(command_list, Traits::get(ds), nullptr, address, pitch);
const bool pitch_compatible = Traits::surface_is_pitch_compatible(ds, pitch);
Traits::prepare_ds_for_drawing(command_list, Traits::get(ds));
return Traits::get(ds);
if (pitch_compatible)
{
const u8 bpp = (depth_format == rsx::surface_depth_format::z16)? 2 : 4;
split_surface_region<true>(command_list, address, Traits::get(ds), (u16)width, (u16)height, bpp, antialias);
}
if (Traits::ds_has_format_width_height(ds, depth_format, width, height))
{
if (pitch_compatible)
Traits::notify_surface_persist(ds);
else
Traits::invalidate_surface_contents(command_list, Traits::get(ds), address, pitch);
Traits::prepare_ds_for_drawing(command_list, Traits::get(ds));
new_surface = Traits::get(ds);
store = false;
}
else
{
old_surface = Traits::get(ds);
old_surface_storage = std::move(ds);
m_depth_stencil_storage.erase(address);
}
}
if (!new_surface)
{
// Range test
const auto aa_factor_v = get_aa_factor_v(antialias);
rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height * aa_factor_v));
m_depth_stencil_memory_range = range.get_min_max(m_depth_stencil_memory_range);
// Select source of original data if any
auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface;
//Search invalidated resources for a suitable surface
for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++)
{
@ -556,10 +855,11 @@ namespace rsx
new_surface = Traits::get(new_surface_storage);
Traits::prepare_ds_for_drawing(command_list, new_surface);
Traits::invalidate_surface_contents(command_list, new_surface, contents_to_copy, address, pitch);
Traits::invalidate_surface_contents(command_list, new_surface, address, pitch);
break;
}
}
}
if (old_surface != nullptr && new_surface == nullptr)
{
@ -568,15 +868,45 @@ namespace rsx
invalidated_resources.push_back(std::move(old_surface_storage));
}
if (new_surface != nullptr)
if (!new_surface)
{
//New surface was found among existing surfaces
m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, pitch, std::forward<Args>(extra_params)...);
new_surface = Traits::get(m_depth_stencil_storage[address]);
}
else if (store)
{
// New surface was found among invalidated surfaces
m_depth_stencil_storage[address] = std::move(new_surface_storage);
return new_surface;
}
m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, pitch, contents_to_copy, std::forward<Args>(extra_params)...);
return Traits::get(m_depth_stencil_storage[address]);
#ifndef INCOMPLETE_SURFACE_CACHE_IMPL
// Check if old_surface is 'new' and avoid intersection
if (old_surface && old_surface->last_use_tag == write_tag)
{
new_surface->set_old_contents(old_surface);
}
else
#endif
{
intersect_surface_region<true>(command_list, address, new_surface);
}
// Remove and preserve if possible any overlapping/replaced color surface
auto aliased_rtt_surface = m_render_targets_storage.find(address);
if (aliased_rtt_surface != m_render_targets_storage.end())
{
if (Traits::surface_is_pitch_compatible(aliased_rtt_surface->second, pitch))
{
const u8 bpp = (depth_format == rsx::surface_depth_format::z16) ? 2 : 4;
split_surface_region<false>(command_list, address, Traits::get(aliased_rtt_surface->second), (u16)width, (u16)height, bpp, antialias);
}
Traits::notify_surface_invalidated(aliased_rtt_surface->second);
invalidated_resources.push_back(std::move(aliased_rtt_surface->second));
m_render_targets_storage.erase(aliased_rtt_surface);
}
return new_surface;
}
public:
/**
@ -600,14 +930,13 @@ namespace rsx
// u32 clip_y = clip_vertical_reg;
cache_tag = rsx::get_shared_tag();
m_memory_tree.clear();
// Make previous RTTs sampleable
for (std::tuple<u32, surface_type> &rtt : m_bound_render_targets)
for (auto &rtt : m_bound_render_targets)
{
if (std::get<1>(rtt) != nullptr)
Traits::prepare_rtt_for_sampling(command_list, std::get<1>(rtt));
rtt = std::make_tuple(0, nullptr);
rtt = std::make_pair(0, nullptr);
}
// Create/Reuse requested rtts
@ -616,7 +945,7 @@ namespace rsx
if (surface_addresses[surface_index] == 0)
continue;
m_bound_render_targets[surface_index] = std::make_tuple(surface_addresses[surface_index],
m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index],
bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias,
clip_width, clip_height, surface_pitch[surface_index], std::forward<Args>(extra_params)...));
}
@ -625,12 +954,12 @@ namespace rsx
if (std::get<1>(m_bound_depth_stencil) != nullptr)
Traits::prepare_ds_for_sampling(command_list, std::get<1>(m_bound_depth_stencil));
m_bound_depth_stencil = std::make_tuple(0, nullptr);
m_bound_depth_stencil = std::make_pair(0, nullptr);
if (!address_z)
return;
m_bound_depth_stencil = std::make_tuple(address_z,
m_bound_depth_stencil = std::make_pair(address_z,
bind_address_as_depth_stencil(command_list, address_z, depth_format, antialias,
clip_width, clip_height, zeta_pitch, std::forward<Args>(extra_params)...));
}
@ -1054,49 +1383,48 @@ namespace rsx
{
write_tag = cache_tag;
}
// Tag all available surfaces
for (int i = 0; i < m_bound_render_targets.size(); ++i)
{
// Usually only 1 or 2 buffers are bound anyway
if (LIKELY(!m_bound_render_targets[i].first))
{
if (i) break;
// B-surface binding
continue;
}
if (memory_tag != cache_tag)
{
generate_render_target_memory_tree();
memory_tag = cache_tag;
m_bound_render_targets[i].second->on_write(write_tag);
}
if (!m_memory_tree.empty())
if (m_bound_depth_stencil.first)
{
for (auto &e : m_memory_tree)
m_bound_depth_stencil.second->on_write(write_tag);
}
}
else
{
if (address && e.memory_address != address)
for (int i = 0; i < m_bound_render_targets.size(); ++i)
{
if (LIKELY(!m_bound_render_targets[i].first))
{
if (i) break;
continue;
}
if (m_bound_render_targets[i].first != address)
{
continue;
}
for (auto &entry : e.overlapping_set)
{
// GPU-side contents changed
entry._ref->dirty = true;
}
}
m_bound_render_targets[i].second->on_write(write_tag);
}
for (auto &rtt : m_bound_render_targets)
if (m_bound_depth_stencil.first == address)
{
if (address && std::get<0>(rtt) != address)
{
continue;
}
if (auto surface = std::get<1>(rtt))
{
surface->on_write(write_tag);
}
}
if (auto ds = std::get<1>(m_bound_depth_stencil))
{
if (!address || std::get<0>(m_bound_depth_stencil) == address)
{
ds->on_write(write_tag);
m_bound_depth_stencil.second->on_write(write_tag);
}
}
}

View File

@ -1750,8 +1750,11 @@ namespace rsx
// Intersect this resource with the original one
const auto section_bpp = get_format_block_size_in_bytes(section->get_gcm_format());
const auto normalized_width = (section->get_width() * section_bpp) / bpp;
const auto clipped = rsx::intersect_region(address, slice_w, slice_h, bpp,
section->get_section_base(), normalized_width, section->get_height(), section_bpp, pitch);
const auto clipped = rsx::intersect_region(
section->get_section_base(), normalized_width, section->get_height(), section_bpp, /* parent region (extractee) */
address, slice_w, slice_h, bpp, /* child region (extracted) */
pitch);
// Rect intersection test
// TODO: Make the intersection code cleaner with proper 2D regions

View File

@ -1,5 +1,7 @@
#pragma once
#define INCOMPLETE_SURFACE_CACHE_IMPL
#include <utility>
#include <d3d12.h>
#include "d3dx12.h"
@ -25,7 +27,6 @@ struct render_target_traits
ComPtr<ID3D12Resource> create_new_surface(
u32 address,
surface_color_format color_format, size_t width, size_t height, size_t /*pitch*/,
ID3D12Resource* /*old*/,
ID3D12Device* device, const std::array<float, 4> &clear_color, float, u8)
{
DXGI_FORMAT dxgi_format = get_color_surface_format(color_format);
@ -86,7 +87,6 @@ struct render_target_traits
ComPtr<ID3D12Resource> create_new_surface(
u32 address,
surface_depth_format surfaceDepthFormat, size_t width, size_t height, size_t /*pitch*/,
ID3D12Resource* /*old*/,
ID3D12Device* device, const std::array<float, 4>& , float clear_depth, u8 clear_stencil)
{
D3D12_CLEAR_VALUE clear_depth_value = {};
@ -131,7 +131,7 @@ struct render_target_traits
static
void invalidate_surface_contents(
ID3D12GraphicsCommandList*,
ID3D12Resource*, ID3D12Resource*,
ID3D12Resource*,
u32, size_t)
{}

View File

@ -214,19 +214,19 @@ void GLGSRender::end()
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
// Handle special memory barrier for ARGB8->D24S8 in an active DSV
if (ds && ds->old_contents != nullptr &&
ds->old_contents->get_internal_format() == gl::texture::internal_format::rgba8 &&
rsx::pitch_compatible(ds, static_cast<gl::render_target*>(ds->old_contents)))
if (ds && ds->old_contents &&
ds->old_contents.source->get_internal_format() == gl::texture::internal_format::rgba8 &&
rsx::pitch_compatible(ds, gl::as_rtt(ds->old_contents.source)))
{
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
// TODO: Stencil transfer
gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF);
ds->old_contents.init_transfer(ds);
const auto region = rsx::get_transferable_region(ds);
m_depth_converter.run({0, 0, std::get<0>(region), std::get<1>(region)},
{0, 0, std::get<2>(region), std::get<3>(region)},
ds->old_contents, ds);
m_depth_converter.run(ds->old_contents.src_rect(),
ds->old_contents.dst_rect(),
ds->old_contents.source, ds);
ds->on_write();
}
@ -400,7 +400,7 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> draw_start = textures_end;
// Optionally do memory synchronization if the texture stage has not yet triggered this
if (g_cfg.video.strict_rendering_mode)
if (1)//g_cfg.video.strict_rendering_mode)
{
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);

View File

@ -215,7 +215,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
return;
}
m_rtts.prepare_render_target(nullptr,
gl::command_context cmd{ gl_state };
m_rtts.prepare_render_target(cmd,
layout.color_format, layout.depth_format,
layout.width, layout.height,
layout.target, layout.aa_mode,
@ -234,8 +235,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const u8 color_bpp = get_format_block_size_in_bytes(layout.color_format);
const u8 depth_bpp = (layout.depth_format == rsx::surface_depth_format::z16 ? 2 : 4);
gl::command_context cmd{ gl_state };
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers)
@ -606,7 +605,7 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
return;
}
auto src_texture = static_cast<gl::render_target*>(old_contents);
auto src_texture = gl::as_rtt(old_contents.source);
if (!rsx::pitch_compatible(this, src_texture))
{
LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory");
@ -617,8 +616,6 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
const auto dst_bpp = get_bpp();
rsx::typeless_xfer typeless_info{};
const auto region = rsx::get_transferable_region(this);
if (get_internal_format() == src_texture->get_internal_format())
{
// Copy data from old contents onto this one
@ -639,9 +636,11 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
}
const bool dst_is_depth = !!(aspect() & gl::image_aspect::depth);
gl::g_hw_blitter->scale_image(cmd, old_contents, this,
{ 0, 0, std::get<0>(region), std::get<1>(region) },
{ 0, 0, std::get<2>(region) , std::get<3>(region) },
old_contents.init_transfer(this);
gl::g_hw_blitter->scale_image(cmd, old_contents.source, this,
old_contents.src_rect(),
old_contents.dst_rect(),
!dst_is_depth, dst_is_depth, typeless_info);
// Memory has been transferred, discard old contents and update memory flags

View File

@ -149,21 +149,26 @@ namespace gl
{
using gl::fbo::fbo;
};
static inline gl::render_target* as_rtt(gl::texture* t)
{
return reinterpret_cast<gl::render_target*>(t);
}
}
struct gl_render_target_traits
{
using surface_storage_type = std::unique_ptr<gl::render_target>;
using surface_type = gl::render_target*;
using command_list_type = void*;
using command_list_type = gl::command_context&;
using download_buffer_object = std::vector<u8>;
using barrier_descriptor_t = rsx::deferred_clipped_region<gl::render_target*>;
static
std::unique_ptr<gl::render_target> create_new_surface(
u32 address,
rsx::surface_color_format surface_color_format,
size_t width, size_t height, size_t pitch,
gl::render_target* old_surface
size_t width, size_t height, size_t pitch
)
{
auto format = rsx::internals::surface_color_format_to_gl(surface_color_format);
@ -176,7 +181,6 @@ struct gl_render_target_traits
std::array<GLenum, 4> native_layout = { (GLenum)format.swizzle.a, (GLenum)format.swizzle.r, (GLenum)format.swizzle.g, (GLenum)format.swizzle.b };
result->set_native_component_layout(native_layout);
result->set_old_contents(old_surface);
result->set_cleared(false);
result->queue_tag(address);
@ -187,8 +191,7 @@ struct gl_render_target_traits
std::unique_ptr<gl::render_target> create_new_surface(
u32 address,
rsx::surface_depth_format surface_depth_format,
size_t width, size_t height, size_t pitch,
gl::render_target* old_surface
size_t width, size_t height, size_t pitch
)
{
auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format);
@ -203,13 +206,47 @@ struct gl_render_target_traits
result->set_native_pitch(native_pitch);
result->set_surface_dimensions((u16)width, (u16)height, (u16)pitch);
result->set_native_component_layout(native_layout);
result->set_old_contents(old_surface);
result->set_cleared(false);
result->queue_tag(address);
return result;
}
static
void clone_surface(
gl::command_context&,
std::unique_ptr<gl::render_target>& sink, gl::render_target* ref,
u32 address, barrier_descriptor_t& prev)
{
if (!sink)
{
auto internal_format = (GLenum)ref->get_internal_format();
const auto new_w = rsx::apply_resolution_scale(prev.width, true, ref->get_surface_width());
const auto new_h = rsx::apply_resolution_scale(prev.height, true, ref->get_surface_height());
sink.reset(new gl::render_target(new_w, new_h, internal_format));
}
prev.target = sink.get();
sink->set_native_pitch(prev.width * ref->get_bpp());
sink->set_surface_dimensions(prev.width, prev.height, ref->get_rsx_pitch());
sink->set_native_component_layout(ref->get_native_component_layout());
sink->queue_tag(address);
sink->sync_tag();
sink->set_old_contents_region(prev, false);
sink->set_cleared(false);
sink->last_use_tag = ref->last_use_tag;
}
static
bool is_compatible_surface(const gl::render_target* surface, const gl::render_target* ref, u16 width, u16 height, u8 /*sample_count*/)
{
return (surface->get_internal_format() == ref->get_internal_format() &&
surface->get_surface_width() == width &&
surface->get_surface_height() == height);
}
static
void get_surface_info(gl::render_target *surface, rsx::surface_format_info *info)
{
@ -220,11 +257,11 @@ struct gl_render_target_traits
info->bpp = surface->get_bpp();
}
static void prepare_rtt_for_drawing(void *, gl::render_target *rtt) { rtt->reset_refs(); }
static void prepare_rtt_for_sampling(void *, gl::render_target*) {}
static void prepare_rtt_for_drawing(gl::command_context&, gl::render_target *rtt) { rtt->reset_refs(); }
static void prepare_rtt_for_sampling(gl::command_context&, gl::render_target*) {}
static void prepare_ds_for_drawing(void *, gl::render_target *ds) { ds->reset_refs(); }
static void prepare_ds_for_sampling(void *, gl::render_target*) {}
static void prepare_ds_for_drawing(gl::command_context&, gl::render_target *ds) { ds->reset_refs(); }
static void prepare_ds_for_sampling(gl::command_context&, gl::render_target*) {}
static
bool surface_is_pitch_compatible(const std::unique_ptr<gl::render_target> &surface, size_t pitch)
@ -233,10 +270,9 @@ struct gl_render_target_traits
}
static
void invalidate_surface_contents(void *, gl::render_target *surface, gl::render_target* old_surface, u32 address, size_t pitch)
void invalidate_surface_contents(gl::command_context&, gl::render_target *surface, u32 address, size_t pitch)
{
surface->set_rsx_pitch((u16)pitch);
surface->set_old_contents(old_surface);
surface->reset_aa_mode();
surface->queue_tag(address);
surface->set_cleared(false);

View File

@ -1459,8 +1459,8 @@ void VKGSRender::end()
// Check for data casts
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && ds->old_contents &&
ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM &&
rsx::pitch_compatible(ds, static_cast<vk::render_target*>(ds->old_contents)))
ds->old_contents.source->info.format == VK_FORMAT_B8G8R8A8_UNORM &&
rsx::pitch_compatible(ds, vk::as_rtt(ds->old_contents.source)))
{
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
@ -1475,11 +1475,11 @@ void VKGSRender::end()
vk::change_image_layout(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
// TODO: Stencil transfer
const auto region = rsx::get_transferable_region(ds);
ds->old_contents.init_transfer(ds);
m_depth_converter->run(*m_current_command_buffer,
{ 0, 0, std::get<0>(region), std::get<1>(region) },
{ 0, 0, std::get<2>(region), std::get<3>(region) },
static_cast<vk::render_target*>(ds->old_contents)->get_view(0xAAE4, rsx::default_remap_vector),
ds->old_contents.src_rect(),
ds->old_contents.dst_rect(),
vk::as_rtt(ds->old_contents.source)->get_view(0xAAE4, rsx::default_remap_vector),
ds, render_pass, m_framebuffers_to_clean);
// TODO: Flush management to avoid pass running out of ubo space (very unlikely)
@ -1827,7 +1827,7 @@ void VKGSRender::end()
}
// Apply write memory barriers
if (g_cfg.video.strict_rendering_mode)
if (1)//g_cfg.video.strict_rendering_mode)
{
if (ds) ds->write_barrier(*m_current_command_buffer);
@ -2976,13 +2976,13 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
return;
}
m_rtts.prepare_render_target(&*m_current_command_buffer,
m_rtts.prepare_render_target(*m_current_command_buffer,
layout.color_format, layout.depth_format,
layout.width, layout.height,
layout.target, layout.aa_mode,
layout.color_addresses, layout.zeta_address,
layout.actual_color_pitch, layout.actual_zeta_pitch,
(*m_device), &*m_current_command_buffer);
(*m_device), *m_current_command_buffer);
// Reset framebuffer information
VkFormat old_format = VK_FORMAT_UNDEFINED;

View File

@ -99,7 +99,7 @@ namespace vk
return;
}
auto src_texture = static_cast<vk::render_target*>(old_contents);
auto src_texture = static_cast<vk::render_target*>(old_contents.source);
if (!rsx::pitch_compatible(this, src_texture))
{
LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory");
@ -110,8 +110,6 @@ namespace vk
const auto dst_bpp = get_bpp();
rsx::typeless_xfer typeless_info{};
const auto region = rsx::get_transferable_region(this);
if (src_texture->info.format == info.format)
{
verify(HERE), src_bpp == dst_bpp;
@ -130,9 +128,11 @@ namespace vk
}
vk::blitter hw_blitter;
hw_blitter.scale_image(cmd, old_contents, this,
{ 0, 0, std::get<0>(region), std::get<1>(region) },
{ 0, 0, std::get<2>(region) , std::get<3>(region) },
old_contents.init_transfer(this);
hw_blitter.scale_image(cmd, old_contents.source, this,
old_contents.src_rect(),
old_contents.dst_rect(),
/*linear?*/false, /*depth?(unused)*/false, typeless_info);
on_write();
@ -152,6 +152,11 @@ namespace vk
: framebuffer(dev, pass, width, height, std::move(atts))
{}
};
static inline vk::render_target* as_rtt(vk::image* t)
{
return static_cast<vk::render_target*>(t);
}
}
namespace rsx
@ -160,15 +165,15 @@ namespace rsx
{
using surface_storage_type = std::unique_ptr<vk::render_target>;
using surface_type = vk::render_target*;
using command_list_type = vk::command_buffer*;
using command_list_type = vk::command_buffer&;
using download_buffer_object = void*;
using barrier_descriptor_t = rsx::deferred_clipped_region<vk::render_target*>;
static std::unique_ptr<vk::render_target> create_new_surface(
u32 address,
surface_color_format format,
size_t width, size_t height, size_t pitch,
vk::render_target* old_surface,
vk::render_device &device, vk::command_buffer *cmd)
vk::render_device &device, vk::command_buffer& cmd)
{
auto fmt = vk::get_compatible_surface_format(format);
VkFormat requested_format = fmt.first;
@ -185,14 +190,13 @@ namespace rsx
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_SAMPLED_BIT,
0));
change_image_layout(*cmd, rtt.get(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
change_image_layout(cmd, rtt.get(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
rtt->native_component_map = fmt.second;
rtt->rsx_pitch = (u16)pitch;
rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format);
rtt->surface_width = (u16)width;
rtt->surface_height = (u16)height;
rtt->set_old_contents(old_surface);
rtt->queue_tag(address);
rtt->dirty = true;
@ -203,8 +207,7 @@ namespace rsx
u32 address,
surface_depth_format format,
size_t width, size_t height, size_t pitch,
vk::render_target* old_surface,
vk::render_device &device, vk::command_buffer *cmd)
vk::render_device &device, vk::command_buffer& cmd)
{
VkFormat requested_format = vk::get_compatible_depth_surface_format(device.get_formats_support(), format);
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT);
@ -227,7 +230,7 @@ namespace rsx
0));
ds->native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R };
change_image_layout(*cmd, ds.get(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
change_image_layout(cmd, ds.get(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
ds->native_pitch = (u16)width * 2;
if (format == rsx::surface_depth_format::z24s8)
@ -237,13 +240,58 @@ namespace rsx
ds->rsx_pitch = (u16)pitch;
ds->surface_width = (u16)width;
ds->surface_height = (u16)height;
ds->set_old_contents(old_surface);
ds->queue_tag(address);
ds->dirty = true;
return ds;
}
static void clone_surface(
vk::command_buffer& cmd,
std::unique_ptr<vk::render_target>& sink, vk::render_target* ref,
u32 address, barrier_descriptor_t& prev)
{
if (!sink)
{
const auto new_w = rsx::apply_resolution_scale(prev.width, true, ref->get_surface_width());
const auto new_h = rsx::apply_resolution_scale(prev.height, true, ref->get_surface_height());
auto& dev = cmd.get_command_pool().get_owner();
sink.reset(new vk::render_target(dev, dev.get_memory_mapping().device_local,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D,
ref->format(),
new_w, new_h, 1, 1, 1,
VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL,
ref->info.usage,
ref->info.flags));
}
prev.target = sink.get();
sink->native_component_map = ref->native_component_map;
sink->rsx_pitch = ref->get_rsx_pitch();
sink->native_pitch = u16(prev.width * ref->get_bpp());
sink->surface_width = prev.width;
sink->surface_height = prev.height;
sink->queue_tag(address);
sink->sync_tag();
sink->set_old_contents_region(prev, false);
sink->dirty = true;
sink->last_use_tag = ref->last_use_tag;
change_image_layout(cmd, sink.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
static bool is_compatible_surface(const vk::render_target* surface, const vk::render_target* ref, u16 width, u16 height, u8 /*sample_count*/)
{
return (surface->format() == ref->format() &&
surface->get_surface_width() == width &&
surface->get_surface_height() == height);
}
static void get_surface_info(vk::render_target *surface, rsx::surface_format_info *info)
{
info->rsx_pitch = surface->rsx_pitch;
@ -253,36 +301,36 @@ namespace rsx
info->bpp = surface->get_bpp();
}
static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface)
static void prepare_rtt_for_drawing(vk::command_buffer& cmd, vk::render_target *surface)
{
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag);
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range);
change_image_layout(cmd, surface, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range);
//Reset deref count
surface->deref_count = 0;
surface->frame_tag = 0;
}
static void prepare_rtt_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface)
static void prepare_rtt_for_sampling(vk::command_buffer& cmd, vk::render_target *surface)
{
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag);
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range);
change_image_layout(cmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range);
}
static void prepare_ds_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface)
static void prepare_ds_for_drawing(vk::command_buffer& cmd, vk::render_target *surface)
{
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag);
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
change_image_layout(cmd, surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
//Reset deref count
surface->deref_count = 0;
surface->frame_tag = 0;
}
static void prepare_ds_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface)
static void prepare_ds_for_sampling(vk::command_buffer& cmd, vk::render_target *surface)
{
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag);
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range);
change_image_layout(cmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range);
}
static bool surface_is_pitch_compatible(const std::unique_ptr<vk::render_target> &surface, size_t pitch)
@ -290,10 +338,9 @@ namespace rsx
return surface->rsx_pitch == pitch;
}
static void invalidate_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *surface, vk::render_target *old_surface, u32 address, size_t pitch)
static void invalidate_surface_contents(vk::command_buffer& /*cmd*/, vk::render_target *surface, u32 address, size_t pitch)
{
surface->rsx_pitch = (u16)pitch;
surface->set_old_contents(old_surface);
surface->reset_aa_mode();
surface->queue_tag(address);
surface->dirty = true;

View File

@ -470,32 +470,35 @@ namespace rsx
return std::make_tuple(x, y, width, height);
}
/**
* Extracts from 'parent' a region that fits in 'child'
*/
static inline std::tuple<position2u, position2u, size2u> intersect_region(
u32 dst_address, u16 dst_w, u16 dst_h, u16 dst_bpp,
u32 src_address, u16 src_w, u16 src_h, u32 src_bpp,
u32 parent_address, u16 parent_w, u16 parent_h, u16 parent_bpp,
u32 child_address, u16 child_w, u16 child_h, u32 child_bpp,
u32 pitch)
{
if (src_address < dst_address)
if (child_address < parent_address)
{
const auto offset = dst_address - src_address;
const auto src_y = (offset / pitch);
const auto src_x = (offset % pitch) / src_bpp;
const auto dst_x = 0u;
const auto dst_y = 0u;
const auto w = std::min<u32>(dst_w, src_w - src_x);
const auto h = std::min<u32>(dst_h, src_h - src_y);
const auto offset = parent_address - child_address;
const auto src_x = 0u;
const auto src_y = 0u;
const auto dst_y = (offset / pitch);
const auto dst_x = (offset % pitch) / child_bpp;
const auto w = std::min<u32>(parent_w, child_w - dst_x);
const auto h = std::min<u32>(parent_h, child_h - dst_y);
return std::make_tuple<position2u, position2u, size2u>({ src_x, src_y }, { dst_x, dst_y }, { w, h });
}
else
{
const auto offset = src_address - dst_address;
const auto src_x = 0u;
const auto src_y = 0u;
const auto dst_y = (offset / pitch);
const auto dst_x = (offset % pitch) / dst_bpp;
const auto w = std::min<u32>(src_w, dst_w - dst_x);
const auto h = std::min<u32>(src_h, dst_h - dst_y);
const auto offset = child_address - parent_address;
const auto src_y = (offset / pitch);
const auto src_x = (offset % pitch) / parent_bpp;
const auto dst_x = 0u;
const auto dst_y = 0u;
const auto w = std::min<u32>(child_w, parent_w - src_x);
const auto h = std::min<u32>(child_h, parent_h - src_y);
return std::make_tuple<position2u, position2u, size2u>({ src_x, src_y }, { dst_x, dst_y }, { w, h });
}
@ -511,10 +514,14 @@ namespace rsx
return g_cfg.video.strict_rendering_mode ? 100 : g_cfg.video.resolution_scale_percent;
}
static inline const u16 apply_resolution_scale(u16 value, bool clamp)
static inline const u16 apply_resolution_scale(u16 value, bool clamp, u16 ref = 0)
{
if (value <= g_cfg.video.min_scalable_dimension)
if (ref == 0)
ref = value;
if (ref <= g_cfg.video.min_scalable_dimension)
return value;
else if (clamp)
return (u16)std::max((get_resolution_scale_percent() * value) / 100, 1);
else
@ -541,14 +548,14 @@ namespace rsx
* Returns <src_w, src_h, dst_w, dst_h>
*/
template <typename SurfaceType>
std::tuple<u16, u16, u16, u16> get_transferable_region(SurfaceType* surface)
std::tuple<u16, u16, u16, u16> get_transferable_region(const SurfaceType* surface)
{
const u16 src_w = surface->old_contents->width();
const u16 src_h = surface->old_contents->height();
const u16 src_w = surface->old_contents.source->width();
const u16 src_h = surface->old_contents.source->height();
u16 dst_w = src_w;
u16 dst_h = src_h;
switch (static_cast<SurfaceType*>(surface->old_contents)->read_aa_mode)
switch (static_cast<const SurfaceType*>(surface->old_contents.source)->read_aa_mode)
{
case rsx::surface_antialiasing::center_1_sample:
break;
@ -584,7 +591,7 @@ namespace rsx
}
template <typename SurfaceType>
inline bool pitch_compatible(SurfaceType* a, SurfaceType* b)
inline bool pitch_compatible(const SurfaceType* a, const SurfaceType* b)
{
if (a->get_surface_height() == 1 || b->get_surface_height() == 1)
return true;
@ -593,7 +600,7 @@ namespace rsx
}
template <bool __is_surface = true, typename SurfaceType>
inline bool pitch_compatible(SurfaceType* surface, u16 pitch_required, u16 height_required)
inline bool pitch_compatible(const SurfaceType* surface, u16 pitch_required, u16 height_required)
{
if constexpr (__is_surface)
{