rsx: Texture cache fixes and improvments

gl/vk/rsx: Refactoring; unify texture cache code
gl: Fixups
- Removes rsx::gl::texture class and leave gl::texture intact
- Simplify texture create and upload mechanisms
- Re-enable texture uploads with the new texture cache mechanism
rsx: texture cache - check if bit region fits into dst texture before attempting to copy
gl/vk: Cleanup
- Set initial texture layout to DST_OPTIMAL since it has no data in it anyway at the start
- Move structs outside of classes to avoid clutter
This commit is contained in:
kd-11 2017-09-08 17:52:13 +03:00
parent 07c83f6e44
commit e37a2a8f7d
26 changed files with 2438 additions and 3083 deletions

View File

@ -393,7 +393,7 @@ u8 get_format_block_size_in_bytes(rsx::surface_color_format format)
}
}
static size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment)
size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment)
{
size_t w = width;
size_t h = std::max<u16>(height, 1);

View File

@ -18,6 +18,7 @@ struct rsx_subresource_layout
* Get size to store texture in a linear fashion.
* Storage is assumed to use a rowPitchAlignement boundary for every row of texture.
*/
size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment);
size_t get_placed_texture_storage_size(const rsx::fragment_texture &texture, size_t row_pitch_alignement, size_t mipmap_alignment = 0x200);
size_t get_placed_texture_storage_size(const rsx::vertex_texture &texture, size_t row_pitch_alignement, size_t mipmap_alignment = 0x200);

View File

@ -43,6 +43,16 @@ namespace rsx
u8 bpp;
};
template <typename image_storage_type>
struct render_target_descriptor
{
virtual image_storage_type get_surface() const = 0;
virtual u16 get_surface_width() const = 0;
virtual u16 get_surface_height() const = 0;
virtual u16 get_rsx_pitch() const = 0;
virtual u16 get_native_pitch() const = 0;
};
/**
* Helper for surface (ie color and depth stencil render target) management.
* It handles surface creation and storage. Backend should only retrieve pointer to surface.
@ -614,7 +624,7 @@ namespace rsx
if (region_fits(info.surface_width, info.surface_height, x_offset, y_offset, real_width, requested_height))
{
w = info.surface_width;
w = real_width;
h = info.surface_height;
clipped = false;
@ -627,8 +637,8 @@ namespace rsx
u16 remaining_width = info.surface_width - x_offset;
u16 remaining_height = info.surface_height - y_offset;
w = remaining_width;
h = remaining_height;
w = std::min(real_width, remaining_width);
h = std::min(requested_height, remaining_height);
clipped = true;
return true;
@ -637,7 +647,7 @@ namespace rsx
if (info.surface_width >= requested_width && info.surface_height >= requested_height)
{
LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region");
w = requested_width;
w = real_width;
h = requested_height;
clipped = true;

View File

@ -0,0 +1,972 @@
#pragma once
#include "../rsx_cache.h"
#include "../rsx_utils.h"
#include <atomic>
namespace rsx
{
enum texture_create_flags
{
default_component_order = 0,
native_component_order = 1,
swapped_native_component_order = 2,
};
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>
class texture_cache
{
private:
std::pair<std::array<u8, 4>, std::array<u8, 4>> default_remap_vector =
{
{ CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G, CELL_GCM_TEXTURE_REMAP_FROM_B },
{ CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP }
};
protected:
struct ranged_storage
{
std::vector<section_storage_type> data; //Stored data
std::atomic_int valid_count = { 0 }; //Number of usable (non-dirty) blocks
u32 max_range = 0; //Largest stored block
void notify(u32 data_size)
{
max_range = std::max(data_size, max_range);
valid_count++;
}
void add(section_storage_type& section, u32 data_size)
{
max_range = std::max(data_size, max_range);
valid_count++;
data.push_back(std::move(section));
}
};
// Keep track of cache misses to pre-emptively flush some addresses
struct framebuffer_memory_characteristics
{
u32 misses;
u32 block_size;
texture_format format;
};
std::atomic_bool in_access_violation_handler = { false };
shared_mutex m_cache_mutex;
std::unordered_map<u32, ranged_storage> m_cache;
std::pair<u32, u32> read_only_range = std::make_pair(0xFFFFFFFF, 0);
std::pair<u32, u32> no_access_range = std::make_pair(0xFFFFFFFF, 0);
std::unordered_map<u32, framebuffer_memory_characteristics> m_cache_miss_statistics_table;
//Memory usage
const s32 m_max_zombie_objects = 32; //Limit on how many texture objects to keep around for reuse after they are invalidated
s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory
/* Helpers */
virtual void free_texture_section(section_storage_type&) = 0;
virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_resource_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) = 0;
virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_storage_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) = 0;
virtual section_storage_type* create_new_texture(commandbuffer_type&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, const u32 gcm_format,
const rsx::texture_dimension_extended type, const texture_create_flags flags, std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) = 0;
virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format,
std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled, std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) = 0;
virtual void enforce_surface_creation_type(section_storage_type& section, const texture_create_flags expected) = 0;
virtual void insert_texture_barrier() = 0;
public:
texture_cache() {}
~texture_cache() {}
virtual void destroy() = 0;
virtual bool is_depth_texture(const u32) = 0;
virtual void on_frame_end() = 0;
section_storage_type *find_texture_from_range(u32 rsx_address, u32 range)
{
auto test = std::make_pair(rsx_address, range);
for (auto &address_range : m_cache)
{
auto &range_data = address_range.second;
for (auto &tex : range_data.data)
{
if (tex.get_section_base() > rsx_address)
continue;
if (!tex.is_dirty() && tex.overlaps(test, true))
return &tex;
}
}
return nullptr;
}
section_storage_type *find_texture_from_dimensions(u32 rsx_address, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
auto found = m_cache.find(rsx_address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{
if (tex.matches(rsx_address, width, height, mipmaps) && !tex.is_dirty())
{
return &tex;
}
}
}
return nullptr;
}
section_storage_type& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
{
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(rsx_address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{
if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty())
{
if (!confirm_dimensions) return tex;
if (tex.matches(rsx_address, width, height, mipmaps))
return tex;
else
{
LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address);
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height());
}
}
}
for (auto &tex : range_data.data)
{
if (tex.is_dirty())
{
if (tex.exists())
{
m_unreleased_texture_objects--;
free_texture_section(tex);
}
range_data.notify(rsx_size);
return tex;
}
}
}
}
writer_lock lock(m_cache_mutex);
section_storage_type tmp;
m_cache[rsx_address].add(tmp, rsx_size);
return m_cache[rsx_address].data.back();
}
section_storage_type* find_flushable_section(const u32 address, const u32 range)
{
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable() && !tex.is_flushed()) continue;
if (tex.matches(address, range))
return &tex;
}
}
return nullptr;
}
template <typename ...Args>
void lock_memory_region(image_storage_type* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height, const u32 pitch, Args&&... extras)
{
section_storage_type& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
writer_lock lock(m_cache_mutex);
if (!region.is_locked())
{
region.reset(memory_address, memory_size);
region.set_dirty(false);
no_access_range = region.get_min_max(no_access_range);
}
region.protect(utils::protection::no);
region.create(width, height, 1, 1, nullptr, image, pitch, false, std::forward<Args>(extras)...);
}
template <typename ...Args>
bool flush_memory_to_cache(const u32 memory_address, const u32 memory_size, bool skip_synchronized, Args&&... extra)
{
section_storage_type* region = find_flushable_section(memory_address, memory_size);
//TODO: Make this an assertion
if (region == nullptr)
{
LOG_ERROR(RSX, "Failed to find section for render target 0x%X + 0x%X", memory_address, memory_size);
return false;
}
if (skip_synchronized && region->is_synchronized())
return false;
region->copy_texture(false, std::forward<Args>(extra)...);
return true;
}
template <typename ...Args>
bool load_memory_from_cache(const u32 memory_address, const u32 memory_size, Args&&... extras)
{
section_storage_type *region = find_flushable_section(memory_address, memory_size);
if (region && !region->is_dirty())
{
region->fill_texture(std::forward<Args>(extras)...);
return true;
}
//No valid object found in cache
return false;
}
std::tuple<bool, section_storage_type*> address_is_flushable(u32 address)
{
if (address < no_access_range.first ||
address > no_access_range.second)
return std::make_tuple(false, nullptr);
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (tex.overlaps(address))
return std::make_tuple(true, &tex);
}
}
for (auto &address_range : m_cache)
{
if (address_range.first == address)
continue;
auto &range_data = address_range.second;
//Quickly discard range
const u32 lock_base = address_range.first & ~0xfff;
const u32 lock_limit = align(range_data.max_range + address_range.first, 4096);
if (address < lock_base || address >= lock_limit)
continue;
for (auto &tex : range_data.data)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (tex.overlaps(address))
return std::make_tuple(true, &tex);
}
}
return std::make_tuple(false, nullptr);
}
template <typename ...Args>
bool flush_address(u32 address, Args&&... extras)
{
if (address < no_access_range.first ||
address > no_access_range.second)
return false;
bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
std::unordered_map<u32, bool> processed_ranges;
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{
auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (processed_ranges[base] || range_data.valid_count == 0)
continue;
//Quickly discard range
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
(lock_base > address || lock_limit <= address))
{
processed_ranges[base] = true;
continue;
}
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
i = 0;
trampled_range = new_range;
range_reset = true;
}
//TODO: Map basic host_visible memory without coherent constraint
if (!tex.flush(std::forward<Args>(extras)...))
{
//Missed address, note this
//TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(tex);
}
response = true;
}
}
if (range_reset)
{
processed_ranges.clear();
It = m_cache.begin();
}
processed_ranges[base] = true;
}
return response;
}
bool invalidate_address(u32 address)
{
return invalidate_range(address, 4096 - (address & 4095));
}
bool invalidate_range(u32 address, u32 range, bool unprotect = true)
{
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
if (trampled_range.second < read_only_range.first ||
trampled_range.first > read_only_range.second)
{
//Doesnt fall in the read_only textures range; check render targets
if (trampled_range.second < no_access_range.first ||
trampled_range.first > no_access_range.second)
return false;
}
bool response = false;
std::unordered_map<u32, bool> processed_ranges;
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{
auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (processed_ranges[base] || range_data.valid_count == 0)
continue;
//Quickly discard range
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second)
{
processed_ranges[base] = true;
continue;
}
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue;
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
i = 0;
trampled_range = new_range;
range_reset = true;
}
if (unprotect)
{
tex.set_dirty(true);
tex.unprotect();
}
else
{
tex.discard();
}
m_unreleased_texture_objects++;
range_data.valid_count--;
response = true;
}
}
if (range_reset)
{
processed_ranges.clear();
It = m_cache.begin();
}
processed_ranges[base] = true;
}
return response;
}
void record_cache_miss(section_storage_type &tex)
{
const u32 memory_address = tex.get_section_base();
const u32 memory_size = tex.get_section_size();
const auto fmt = tex.get_format();
auto It = m_cache_miss_statistics_table.find(memory_address);
if (It == m_cache_miss_statistics_table.end())
{
m_cache_miss_statistics_table[memory_address] = { 1, memory_size, fmt };
return;
}
auto &value = It->second;
if (value.format != fmt || value.block_size != memory_size)
{
m_cache_miss_statistics_table[memory_address] = { 1, memory_size, fmt };
return;
}
value.misses++;
}
template <typename ...Args>
void flush_if_cache_miss_likely(const texture_format fmt, const u32 memory_address, const u32 memory_size, Args&&... extras)
{
auto It = m_cache_miss_statistics_table.find(memory_address);
if (It == m_cache_miss_statistics_table.end())
{
m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt };
return;
}
auto &value = It->second;
if (value.format != fmt || value.block_size != memory_size)
{
//Reset since the data has changed
//TODO: Keep track of all this information together
m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt };
return;
}
//Properly synchronized - no miss
if (!value.misses) return;
//Auto flush if this address keeps missing (not properly synchronized)
if (value.misses > 16)
{
//TODO: Determine better way of setting threshold
if (!flush_memory_to_cache(memory_address, memory_size, true, std::forward<Args>(extras)...))
value.misses--;
}
}
void purge_dirty()
{
//Reclaims all graphics memory consumed by dirty textures
std::vector<u32> empty_addresses;
empty_addresses.resize(32);
for (auto &address_range : m_cache)
{
auto &range_data = address_range.second;
if (range_data.valid_count == 0)
empty_addresses.push_back(address_range.first);
for (auto &tex : range_data.data)
{
if (!tex.is_dirty())
continue;
free_texture_section(tex);
}
}
//Free descriptor objects as well
for (const auto &address : empty_addresses)
{
m_cache.erase(address);
}
m_unreleased_texture_objects = 0;
}
template <typename RsxTextureType, typename surface_store_type>
image_view_type upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts)
{
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
const u32 range = (u32)get_texture_size(tex);
const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
const u32 tex_width = tex.width();
const u32 tex_height = tex.height();
const u32 native_pitch = (tex_width * get_format_block_size_in_bytes(format));
const u32 tex_pitch = (tex.pitch() == 0) ? native_pitch : tex.pitch();
if (!texaddr || !range)
{
LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, range);
return 0;
}
//Check for sampleable rtts from previous render passes
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{
for (const auto& tex : m_rtts.m_bound_render_targets)
{
if (std::get<0>(tex) == texaddr)
{
if (g_cfg.video.strict_rendering_mode)
{
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height());
}
else
{
//issue a texture barrier to ensure previous writes are visible
insert_texture_barrier();
break;
}
}
}
return texptr->get_view();
}
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
{
if (texaddr == std::get<0>(m_rtts.m_bound_depth_stencil))
{
if (g_cfg.video.strict_rendering_mode)
{
LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr);
return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height());
}
else
{
//issue a texture barrier to ensure previous writes are visible
insert_texture_barrier();
}
}
return texptr->get_view();
}
/* Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise
* (Turbo: Super Stunt Squad does this; bypassing the need for a sync object)
* The engine does not read back the texture resource through cell, but specifies a texture location that is
* a bound render target. We can bypass the expensive download in this case
*/
const f32 internal_scale = (f32)tex_pitch / native_pitch;
const u32 internal_width = (const u32)(tex_width * internal_scale);
const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, internal_width, tex_height, tex_pitch, true);
if (rsc.surface)
{
//TODO: Check that this region is not cpu-dirty before doing a copy
if (tex.get_extended_texture_dimension() != rsx::texture_dimension_extended::texture_dimension_2d)
{
LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d",
texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height());
}
else
{
image_view_type bound_surface = 0;
if (format == CELL_GCM_TEXTURE_COMPRESSED_DXT1 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT23 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT45)
{
LOG_WARNING(RSX, "Performing an RTT blit but request is for a compressed texture");
}
if (!rsc.is_bound || !g_cfg.video.strict_rendering_mode)
{
if (rsc.w == tex_width && rsc.h == tex_height)
{
if (rsc.is_bound)
{
LOG_WARNING(RSX, "Sampling from a currently bound render target @ 0x%x", texaddr);
insert_texture_barrier();
}
return rsc.surface->get_view();
}
else
bound_surface = create_temporary_subresource_view(cmd, rsc.surface, format, rsc.x, rsc.y, rsc.w, rsc.h);
}
else
{
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
bound_surface = create_temporary_subresource_view(cmd, rsc.surface, format, rsc.x, rsc.y, rsc.w, rsc.h);
}
if (bound_surface)
return bound_surface;
}
}
//If all the above failed, then its probably a generic texture.
//Search in cache and upload/bind
auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height);
if (cached_texture)
{
return cached_texture->get_raw_view();
}
//Do direct upload from CPU as the last resort
const auto extended_dimension = tex.get_extended_texture_dimension();
u16 height = 0;
u16 depth = 0;
switch (extended_dimension)
{
case rsx::texture_dimension_extended::texture_dimension_1d:
height = 1;
depth = 1;
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
height = tex_height;
depth = 1;
break;
case rsx::texture_dimension_extended::texture_dimension_cubemap:
height = tex_height;
depth = 1;
break;
case rsx::texture_dimension_extended::texture_dimension_3d:
height = tex_height;
depth = tex.depth();
break;
}
const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN);
auto subresources_layout = get_subresources_layout(tex);
auto remap_vector = tex.decoded_remap();
return upload_image_from_cpu(cmd, texaddr, tex_width, height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
}
template <typename surface_store_type, typename blitter_type, typename ...Args>
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras)
{
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag
//TODO: Verify correct behavior
bool is_depth_blit = false;
bool src_is_render_target = false;
bool dst_is_render_target = false;
bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8);
bool src_is_argb8 = (src.format == rsx::blit_engine::transfer_source_format::a8r8g8b8);
image_resource_type vram_texture = 0;
image_resource_type dest_texture = 0;
const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0));
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0));
//Check if src/dst are parts of render targets
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst.rsx_address, dst.width, dst.clip_height, dst.pitch, true, true, false);
dst_is_render_target = dst_subres.surface != nullptr;
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src.rsx_address, src.width, src.height, src.pitch, true, true, false);
src_is_render_target = src_subres.surface != nullptr;
//Always use GPU blit if src or dst is in the surface store
if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target))
return false;
u16 max_dst_width = dst.width;
u16 max_dst_height = dst.height;
float scale_x = dst.scale_x;
float scale_y = dst.scale_y;
size2i clip_dimensions = { dst.clip_width, dst.clip_height };
//Dimensions passed are restricted to powers of 2; get real height from clip_height and width from pitch
size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.clip_height };
//Offset in x and y for src is 0 (it is already accounted for when getting pixels_src)
//Reproject final clip onto source...
const u16 src_w = (const u16)((f32)clip_dimensions.width / dst.scale_x);
const u16 src_h = (const u16)((f32)clip_dimensions.height / dst.scale_y);
areai src_area = { 0, 0, src_w, src_h };
areai dst_area = { 0, 0, dst.clip_width, dst.clip_height };
//Check if trivial memcpy can perform the same task
//Used to copy programs to the GPU in some cases
bool is_memcpy = false;
u32 memcpy_bytes_length = 0;
if (dst_is_argb8 == src_is_argb8 && !dst.swizzled)
{
if ((src.slice_h == 1 && dst.clip_height == 1) ||
(dst.clip_width == src.width && dst.clip_height == src.slice_h && src.pitch == dst.pitch))
{
const u8 bpp = dst_is_argb8 ? 4 : 2;
is_memcpy = true;
memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
}
}
section_storage_type* cached_dest = nullptr;
if (!dst_is_render_target)
{
//First check if this surface exists in VRAM with exact dimensions
//Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible
cached_dest = find_texture_from_dimensions(dst.rsx_address, dst_dimensions.width, dst_dimensions.height);
//Check for any available region that will fit this one
if (!cached_dest) cached_dest = find_texture_from_range(dst_address, dst.pitch * dst.clip_height);
if (cached_dest)
{
//Prep surface
enforce_surface_creation_type(*cached_dest, dst.swizzled ? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order);
//TODO: Move this code into utils since it is used alot
if (const u32 address_offset = dst.rsx_address - cached_dest->get_section_base())
{
const u16 bpp = dst_is_argb8 ? 4 : 2;
const u16 offset_y = address_offset / dst.pitch;
const u16 offset_x = address_offset % dst.pitch;
const u16 offset_x_in_block = offset_x / bpp;
dst_area.x1 += offset_x_in_block;
dst_area.x2 += offset_x_in_block;
dst_area.y1 += offset_y;
dst_area.y2 += offset_y;
}
//Validate clipping region
if ((unsigned)dst_area.x2 <= cached_dest->get_width() &&
(unsigned)dst_area.y2 <= cached_dest->get_height())
{
dest_texture = cached_dest->get_raw_texture();
max_dst_width = cached_dest->get_width();
max_dst_height = cached_dest->get_height();
}
else
cached_dest = nullptr;
}
if (!cached_dest && is_memcpy)
{
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
}
}
else
{
dst_area.x1 += dst_subres.x;
dst_area.x2 += dst_subres.x;
dst_area.y1 += dst_subres.y;
dst_area.y2 += dst_subres.y;
dest_texture = dst_subres.surface->get_surface();
max_dst_width = dst_subres.surface->get_surface_width();
max_dst_height = dst_subres.surface->get_surface_height();
if (is_memcpy)
{
//Some render target descriptions are actually invalid
//Confirm this is a flushable RTT
const auto rsx_pitch = dst_subres.surface->get_rsx_pitch();
const auto native_pitch = dst_subres.surface->get_native_pitch();
if (rsx_pitch <= 64 && native_pitch != rsx_pitch)
{
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
}
}
}
//Create source texture if does not exist
if (!src_is_render_target)
{
auto preloaded_texture = find_texture_from_dimensions(src_address, src.width, src.slice_h);
if (preloaded_texture != nullptr)
{
vram_texture = preloaded_texture->get_raw_texture();
}
else
{
flush_address(src.rsx_address, std::forward<Args>(extras)...);
const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
std::vector<rsx_subresource_layout> subresource_layout;
rsx_subresource_layout subres = {};
subres.width_in_block = src.width;
subres.height_in_block = src.slice_h;
subres.pitch_in_bytes = pitch_in_block;
subres.depth = 1;
subres.data = { (const gsl::byte*)src.pixels, src.pitch * src.slice_h };
subresource_layout.push_back(subres);
const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format,
subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled, default_remap_vector)->get_raw_texture();
}
}
else
{
if (src_subres.w != clip_dimensions.width ||
src_subres.h != clip_dimensions.height)
{
f32 subres_scaling_x = (f32)src.pitch / src_subres.surface->get_native_pitch();
const int dst_width = (int)(src_subres.w * dst.scale_x * subres_scaling_x);
const int dst_height = (int)(src_subres.h * dst.scale_y);
dst_area.x2 = dst_area.x1 + dst_width;
dst_area.y2 = dst_area.y1 + dst_height;
}
src_area.x2 = src_subres.w;
src_area.y2 = src_subres.h;
src_area.x1 += src_subres.x;
src_area.x2 += src_subres.x;
src_area.y1 += src_subres.y;
src_area.y2 += src_subres.y;
if (src.compressed_y)
{
dst_area.y1 *= 2;
dst_area.y2 *= 2;
dst_dimensions.height *= 2;
}
vram_texture = src_subres.surface->get_surface();
}
bool format_mismatch = false;
if (src_subres.is_depth_surface)
{
if (dest_texture)
{
if (dst_is_render_target && !dst_subres.is_depth_surface)
{
LOG_ERROR(RSX, "Depth->RGBA blit requested but not supported");
return true;
}
if (!cached_dest->has_compatible_format(src_subres.surface))
format_mismatch = true;
}
is_depth_blit = true;
}
//TODO: Check for other types of format mismatch
if (format_mismatch)
{
invalidate_range(cached_dest->get_section_base(), cached_dest->get_section_size());
dest_texture = 0;
cached_dest = nullptr;
}
//Validate clipping region
if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0;
if ((dst.offset_y + dst.clip_y + dst.clip_height) > max_dst_height) dst.clip_y = 0;
//Reproject clip offsets onto source to simplify blit
if (dst.clip_x || dst.clip_y)
{
const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / dst.scale_x);
const u16 scaled_clip_offset_y = (const u16)((f32)dst.clip_y / dst.scale_y);
src_area.x1 += scaled_clip_offset_x;
src_area.x2 += scaled_clip_offset_x;
src_area.y1 += scaled_clip_offset_y;
src_area.y2 += scaled_clip_offset_y;
}
if (dest_texture == 0)
{
u32 gcm_format;
if (is_depth_blit)
gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_DEPTH24_D8 : CELL_GCM_TEXTURE_DEPTH16;
else
gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
dest_texture = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst.clip_height,
dst_dimensions.width, dst_dimensions.height, 1, 1,
gcm_format, rsx::texture_dimension_extended::texture_dimension_2d,
dst.swizzled? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order,
default_remap_vector)->get_raw_texture();
}
blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit);
return true;
}
};
}

View File

@ -463,22 +463,12 @@ void GLGSRender::end()
{
int location;
if (!rsx::method_registers.fragment_textures[i].enabled())
{
if (m_textures_dirty[i])
{
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, 0);
m_textures_dirty[i] = false;
}
continue;
}
if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
{
m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]));
__glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts);
__glcheck m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
m_gl_texture_cache.upload_and_bind_texture(i, get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]), rsx::method_registers.fragment_textures[i], m_rtts);
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
}
}
@ -489,16 +479,11 @@ void GLGSRender::end()
int location;
if (!rsx::method_registers.vertex_textures[i].enabled())
{
//glActiveTexture(GL_TEXTURE0 + texture_index);
//glBindTexture(GL_TEXTURE_2D, 0);
continue;
}
if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
{
m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i]));
__glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts);
m_gl_texture_cache.upload_and_bind_texture(texture_index, GL_TEXTURE_2D, rsx::method_registers.vertex_textures[i], m_rtts);
}
}
@ -766,7 +751,8 @@ void GLGSRender::on_init_thread()
glEnable(GL_CLIP_DISTANCE0 + 4);
glEnable(GL_CLIP_DISTANCE0 + 5);
m_gl_texture_cache.initialize(this);
m_gl_texture_cache.initialize();
m_thread_id = std::this_thread::get_id();
m_shaders_cache->load();
}
@ -831,7 +817,7 @@ void GLGSRender::on_exit()
}
m_text_printer.close();
m_gl_texture_cache.close();
m_gl_texture_cache.destroy();
for (u32 i = 0; i < occlusion_query_count; ++i)
{
@ -1107,15 +1093,10 @@ void GLGSRender::flip(int buffer)
//Check the texture cache for a blitted copy
const u32 size = buffer_pitch * buffer_height;
auto surface = m_gl_texture_cache.find_texture_from_range(absolute_address, size);
bool ignore_scaling = false;
if (surface != nullptr)
{
auto dims = surface->get_dimensions();
buffer_width = std::get<0>(dims);
buffer_height = std::get<1>(dims);
m_flip_fbo.color = surface->id();
m_flip_fbo.color = surface->get_raw_view();
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
@ -1125,7 +1106,6 @@ void GLGSRender::flip(int buffer)
m_flip_fbo.color = *render_target_texture;
m_flip_fbo.read_buffer(m_flip_fbo.color);
ignore_scaling = true;
}
else
{
@ -1156,20 +1136,6 @@ void GLGSRender::flip(int buffer)
m_flip_fbo.color = m_flip_tex_color;
m_flip_fbo.read_buffer(m_flip_fbo.color);
ignore_scaling = true;
}
if (!ignore_scaling && buffer_region.tile && buffer_region.tile->comp != CELL_GCM_COMPMODE_DISABLED)
{
LOG_ERROR(RSX, "Output buffer compression mode = 0x%X", buffer_region.tile->comp);
switch (buffer_region.tile->comp)
{
case CELL_GCM_COMPMODE_C32_2X2:
case CELL_GCM_COMPMODE_C32_2X1:
buffer_height = display_buffers[buffer].height / 2;
break;
}
}
// Blit source image to the screen
@ -1196,7 +1162,7 @@ void GLGSRender::flip(int buffer)
rsx::thread::flip(buffer);
// Cleanup
m_gl_texture_cache.clear_temporary_surfaces();
m_gl_texture_cache.on_frame_end();
for (auto &tex : m_rtts.invalidated_resources)
tex->remove();
@ -1229,9 +1195,31 @@ u64 GLGSRender::timestamp() const
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
{
if (is_writing)
return m_gl_texture_cache.mark_as_dirty(address);
return m_gl_texture_cache.invalidate_address(address);
else
return m_gl_texture_cache.flush_section(address);
{
if (std::this_thread::get_id() != m_thread_id)
{
bool flushable;
gl::cached_texture_section* section_to_post;
std::tie(flushable, section_to_post) = m_gl_texture_cache.address_is_flushable(address);
if (!flushable) return false;
work_item &task = post_flush_request(address, section_to_post);
vm::temporary_unlock();
{
std::unique_lock<std::mutex> lock(task.guard_mutex);
task.cv.wait(lock, [&task] { return task.processed; });
}
task.received = true;
return task.result;
}
return m_gl_texture_cache.flush_address(address);
}
}
void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
@ -1273,7 +1261,7 @@ void GLGSRender::do_local_task()
}
}
work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::cached_texture_section *section)
work_item& GLGSRender::post_flush_request(u32 address, gl::cached_texture_section *section)
{
std::lock_guard<std::mutex> lock(queue_guard);
@ -1295,7 +1283,7 @@ void GLGSRender::synchronize_buffers()
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
{
return m_gl_texture_cache.upload_scaled_image(src, dst, interpolate, m_rtts);
return m_gl_texture_cache.blit(src, dst, interpolate, m_rtts);
}
void GLGSRender::check_zcull_status(bool framebuffer_swap, bool force_read)

View File

@ -29,21 +29,321 @@ struct work_item
std::mutex guard_mutex;
u32 address_to_flush = 0;
gl::texture_cache::cached_texture_section *section_to_flush = nullptr;
gl::cached_texture_section *section_to_flush = nullptr;
volatile bool processed = false;
volatile bool result = false;
volatile bool received = false;
};
struct occlusion_query_info
{
GLuint handle;
GLint result;
GLint num_draws;
bool pending;
bool active;
};
struct zcull_statistics
{
u32 zpass_pixel_cnt;
u32 zcull_stats;
u32 zcull_stats1;
u32 zcull_stats2;
u32 zcull_stats3;
void clear()
{
zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0;
}
};
struct occlusion_task
{
std::vector<occlusion_query_info*> task_stack;
occlusion_query_info* active_query = nullptr;
u32 pending = 0;
//Add one query to the task
void add(occlusion_query_info* query)
{
active_query = query;
if (task_stack.size() > 0 && pending == 0)
task_stack.resize(0);
const auto empty_slots = task_stack.size() - pending;
if (empty_slots >= 4)
{
for (auto &_query : task_stack)
{
if (_query == nullptr)
{
_query = query;
pending++;
return;
}
}
}
task_stack.push_back(query);
pending++;
}
};
struct driver_state
{
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;
const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002;
const u32 DEPTH_RANGE_MIN = 0xFFFF0003;
const u32 DEPTH_RANGE_MAX = 0xFFFF0004;
std::unordered_map<GLenum, u32> properties = {};
std::unordered_map<GLenum, std::array<u32, 4>> indexed_properties = {};
bool enable(u32 test, GLenum cap)
{
auto found = properties.find(cap);
if (found != properties.end() && found->second == test)
return !!test;
properties[cap] = test;
if (test)
glEnable(cap);
else
glDisable(cap);
return !!test;
}
bool enablei(u32 test, GLenum cap, u32 index)
{
auto found = indexed_properties.find(cap);
const bool exists = found != indexed_properties.end();
if (!exists)
{
indexed_properties[cap] = {};
indexed_properties[cap][index] = test;
}
else
{
if (found->second[index] == test)
return !!test;
found->second[index] = test;
}
if (test)
glEnablei(cap, index);
else
glDisablei(cap, index);
return !!test;
}
const bool test_property(GLenum property, u32 test) const
{
auto found = properties.find(property);
if (found == properties.end())
return false;
return (found->second == test);
}
void depth_func(GLenum func)
{
if (!test_property(GL_DEPTH_FUNC, func))
{
glDepthFunc(func);
properties[GL_DEPTH_FUNC] = func;
}
}
void depth_mask(GLboolean mask)
{
if (!test_property(GL_DEPTH_WRITEMASK, mask))
{
glDepthMask(mask);
properties[GL_DEPTH_WRITEMASK] = mask;
}
}
void clear_depth(GLfloat depth)
{
u32 value = (u32&)depth;
if (!test_property(GL_DEPTH_CLEAR_VALUE, value))
{
glClearDepth(depth);
properties[GL_DEPTH_CLEAR_VALUE] = value;
}
}
void stencil_mask(GLuint mask)
{
if (!test_property(GL_STENCIL_WRITEMASK, mask))
{
glStencilMask(mask);
properties[GL_STENCIL_WRITEMASK] = mask;
}
}
void clear_stencil(GLint stencil)
{
u32 value = (u32&)stencil;
if (!test_property(GL_STENCIL_CLEAR_VALUE, value))
{
glClearStencil(stencil);
properties[GL_STENCIL_CLEAR_VALUE] = value;
}
}
void color_mask(u32 mask)
{
if (!test_property(GL_COLOR_WRITEMASK, mask))
{
glColorMask(((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0), ((mask & 0x10) ? 1 : 0));
properties[GL_COLOR_WRITEMASK] = mask;
}
}
void color_mask(bool r, bool g, bool b, bool a)
{
u32 mask = 0;
if (r) mask |= 0x20;
if (g) mask |= 0x40;
if (b) mask |= 0x80;
if (a) mask |= 0x10;
color_mask(mask);
}
void clear_color(u8 r, u8 g, u8 b, u8 a)
{
u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24;
if (!test_property(GL_COLOR_CLEAR_VALUE, value))
{
glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f);
properties[GL_COLOR_CLEAR_VALUE] = value;
}
}
void depth_bounds(float min, float max)
{
u32 depth_min = (u32&)min;
u32 depth_max = (u32&)max;
if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max))
{
glDepthBoundsEXT(min, max);
properties[DEPTH_BOUNDS_MIN] = depth_min;
properties[DEPTH_BOUNDS_MAX] = depth_max;
}
}
void depth_range(float min, float max)
{
u32 depth_min = (u32&)min;
u32 depth_max = (u32&)max;
if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max))
{
glDepthRange(min, max);
properties[DEPTH_RANGE_MIN] = depth_min;
properties[DEPTH_RANGE_MAX] = depth_max;
}
}
void logic_op(GLenum op)
{
if (!test_property(GL_COLOR_LOGIC_OP, op))
{
glLogicOp(op);
properties[GL_COLOR_LOGIC_OP] = op;
}
}
void line_width(GLfloat width)
{
u32 value = (u32&)width;
if (!test_property(GL_LINE_WIDTH, value))
{
glLineWidth(width);
properties[GL_LINE_WIDTH] = value;
}
}
void front_face(GLenum face)
{
if (!test_property(GL_FRONT_FACE, face))
{
glFrontFace(face);
properties[GL_FRONT_FACE] = face;
}
}
void cull_face(GLenum mode)
{
if (!test_property(GL_CULL_FACE_MODE, mode))
{
glCullFace(mode);
properties[GL_CULL_FACE_MODE] = mode;
}
}
void polygon_offset(float factor, float units)
{
u32 _units = (u32&)units;
u32 _factor = (u32&)factor;
if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor))
{
glPolygonOffset(factor, units);
properties[GL_POLYGON_OFFSET_UNITS] = _units;
properties[GL_POLYGON_OFFSET_FACTOR] = _factor;
}
}
};
struct sw_ring_buffer
{
std::vector<u8> data;
u32 ring_pos = 0;
u32 ring_length = 0;
sw_ring_buffer(u32 size)
{
data.resize(size);
ring_length = size;
}
void* get(u32 dwords)
{
const u32 required = (dwords << 2);
if ((ring_pos + required) > ring_length)
{
ring_pos = 0;
return data.data();
}
void *result = data.data() + ring_pos;
ring_pos += required;
return result;
}
};
class GLGSRender : public GSRender
{
private:
GLFragmentProgram m_fragment_prog;
GLVertexProgram m_vertex_prog;
rsx::gl::texture m_gl_textures[rsx::limits::fragment_textures_count];
rsx::gl::texture m_gl_vertex_textures[rsx::limits::vertex_textures_count];
gl::sampler_state m_gl_sampler_states[rsx::limits::fragment_textures_count];
gl::glsl::program *m_program;
@ -86,14 +386,12 @@ private:
rsx::gcm_framebuffer_info depth_surface_info;
bool flush_draw_buffers = false;
std::thread::id m_thread_id;
public:
gl::fbo draw_fbo;
private:
GLProgramBuffer m_prog_buffer;
//buffer
gl::fbo draw_fbo;
gl::fbo m_flip_fbo;
gl::texture m_flip_tex_color;
@ -102,64 +400,8 @@ private:
//occlusion query
bool zcull_surface_active = false;
struct occlusion_query_info
{
GLuint handle;
GLint result;
GLint num_draws;
bool pending;
bool active;
};
struct
{
u32 zpass_pixel_cnt;
u32 zcull_stats;
u32 zcull_stats1;
u32 zcull_stats2;
u32 zcull_stats3;
void clear()
{
zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0;
}
}
current_zcull_stats;
struct occlusion_task
{
std::vector<occlusion_query_info*> task_stack;
occlusion_query_info* active_query = nullptr;
u32 pending = 0;
//Add one query to the task
void add(occlusion_query_info* query)
{
active_query = query;
if (task_stack.size() > 0 && pending == 0)
task_stack.resize(0);
const auto empty_slots = task_stack.size() - pending;
if (empty_slots >= 4)
{
for (auto &_query : task_stack)
{
if (_query == nullptr)
{
_query = query;
pending++;
return;
}
}
}
task_stack.push_back(query);
pending++;
}
}
zcull_task_queue = {};
zcull_statistics current_zcull_stats;
occlusion_task zcull_task_queue = {};
const u32 occlusion_query_count = 128;
std::array<occlusion_query_info, 128> occlusion_query_data = {};
@ -169,225 +411,7 @@ public:
private:
struct
{
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;
const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002;
const u32 DEPTH_RANGE_MIN = 0xFFFF0003;
const u32 DEPTH_RANGE_MAX = 0xFFFF0004;
std::unordered_map<GLenum, u32> properties = {};
std::unordered_map<GLenum, std::array<u32, 4>> indexed_properties = {};
bool enable(u32 test, GLenum cap)
{
auto found = properties.find(cap);
if (found != properties.end() && found->second == test)
return !!test;
properties[cap] = test;
if (test)
glEnable(cap);
else
glDisable(cap);
return !!test;
}
bool enablei(u32 test, GLenum cap, u32 index)
{
auto found = indexed_properties.find(cap);
const bool exists = found != indexed_properties.end();
if (!exists)
{
indexed_properties[cap] = {};
indexed_properties[cap][index] = test;
}
else
{
if (found->second[index] == test)
return !!test;
found->second[index] = test;
}
if (test)
glEnablei(cap, index);
else
glDisablei(cap, index);
return !!test;
}
const bool test_property(GLenum property, u32 test) const
{
auto found = properties.find(property);
if (found == properties.end())
return false;
return (found->second == test);
}
void depth_func(GLenum func)
{
if (!test_property(GL_DEPTH_FUNC, func))
{
glDepthFunc(func);
properties[GL_DEPTH_FUNC] = func;
}
}
void depth_mask(GLboolean mask)
{
if (!test_property(GL_DEPTH_WRITEMASK, mask))
{
glDepthMask(mask);
properties[GL_DEPTH_WRITEMASK] = mask;
}
}
void clear_depth(GLfloat depth)
{
u32 value = (u32&)depth;
if (!test_property(GL_DEPTH_CLEAR_VALUE, value))
{
glClearDepth(depth);
properties[GL_DEPTH_CLEAR_VALUE] = value;
}
}
void stencil_mask(GLuint mask)
{
if (!test_property(GL_STENCIL_WRITEMASK, mask))
{
glStencilMask(mask);
properties[GL_STENCIL_WRITEMASK] = mask;
}
}
void clear_stencil(GLint stencil)
{
u32 value = (u32&)stencil;
if (!test_property(GL_STENCIL_CLEAR_VALUE, value))
{
glClearStencil(stencil);
properties[GL_STENCIL_CLEAR_VALUE] = value;
}
}
void color_mask(u32 mask)
{
if (!test_property(GL_COLOR_WRITEMASK, mask))
{
glColorMask(((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0), ((mask & 0x10) ? 1 : 0));
properties[GL_COLOR_WRITEMASK] = mask;
}
}
void color_mask(bool r, bool g, bool b, bool a)
{
u32 mask = 0;
if (r) mask |= 0x20;
if (g) mask |= 0x40;
if (b) mask |= 0x80;
if (a) mask |= 0x10;
color_mask(mask);
}
void clear_color(u8 r, u8 g, u8 b, u8 a)
{
u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24;
if (!test_property(GL_COLOR_CLEAR_VALUE, value))
{
glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f);
properties[GL_COLOR_CLEAR_VALUE] = value;
}
}
void depth_bounds(float min, float max)
{
u32 depth_min = (u32&)min;
u32 depth_max = (u32&)max;
if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max))
{
glDepthBoundsEXT(min, max);
properties[DEPTH_BOUNDS_MIN] = depth_min;
properties[DEPTH_BOUNDS_MAX] = depth_max;
}
}
void depth_range(float min, float max)
{
u32 depth_min = (u32&)min;
u32 depth_max = (u32&)max;
if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max))
{
glDepthRange(min, max);
properties[DEPTH_RANGE_MIN] = depth_min;
properties[DEPTH_RANGE_MAX] = depth_max;
}
}
void logic_op(GLenum op)
{
if (!test_property(GL_COLOR_LOGIC_OP, op))
{
glLogicOp(op);
properties[GL_COLOR_LOGIC_OP] = op;
}
}
void line_width(GLfloat width)
{
u32 value = (u32&)width;
if (!test_property(GL_LINE_WIDTH, value))
{
glLineWidth(width);
properties[GL_LINE_WIDTH] = value;
}
}
void front_face(GLenum face)
{
if (!test_property(GL_FRONT_FACE, face))
{
glFrontFace(face);
properties[GL_FRONT_FACE] = face;
}
}
void cull_face(GLenum mode)
{
if (!test_property(GL_CULL_FACE_MODE, mode))
{
glCullFace(mode);
properties[GL_CULL_FACE_MODE] = mode;
}
}
void polygon_offset(float factor, float units)
{
u32 _units = (u32&)units;
u32 _factor = (u32&)factor;
if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor))
{
glPolygonOffset(factor, units);
properties[GL_POLYGON_OFFSET_UNITS] = _units;
properties[GL_POLYGON_OFFSET_FACTOR] = _factor;
}
}
}
gl_state;
driver_state gl_state;
// Return element to draw and in case of indexed draw index type and offset in index buffer
std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32> > > set_vertex_buffer();
@ -405,7 +429,7 @@ public:
void set_viewport();
void synchronize_buffers();
work_item& post_flush_request(u32 address, gl::texture_cache::cached_texture_section *section);
work_item& post_flush_request(u32 address, gl::cached_texture_section *section);
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;

View File

@ -1512,6 +1512,11 @@ namespace gl
return (texture::format)result;
}
virtual texture::internal_format get_compatible_internal_format() const
{
return (texture::internal_format)get_internal_format();
}
texture::channel_type get_channel_type(texture::channel_name channel) const
{
save_binding_state save(*this);

View File

@ -291,8 +291,8 @@ void GLGSRender::init_buffers(bool skip_reading)
if (!surface_info[i].address || !surface_info[i].pitch) continue;
const u32 range = surface_info[i].pitch * surface_info[i].height;
m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch,
color_format.format, color_format.type, color_format.swap_bytes, *std::get<1>(m_rtts.m_bound_render_targets[i]));
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch,
color_format.format, color_format.type, color_format.swap_bytes);
}
}
@ -311,8 +311,8 @@ void GLGSRender::init_buffers(bool skip_reading)
if (pitch != depth_surface_info.pitch)
LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch);
m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch,
depth_format_gl.format, depth_format_gl.type, true, *std::get<1>(m_rtts.m_bound_depth_stencil));
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch,
depth_format_gl.format, depth_format_gl.type, true);
}
}
}
@ -344,8 +344,8 @@ void GLGSRender::read_buffers()
auto read_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers.surface_clip_width();
u32 height = rsx::method_registers.surface_clip_height();
const u32 width = rsx::method_registers.surface_clip_width();
const u32 height = rsx::method_registers.surface_clip_height();
const std::array<u32, 4> offsets = get_offsets();
const std::array<u32, 4 > locations = get_locations();
@ -353,17 +353,19 @@ void GLGSRender::read_buffers()
for (int i = index; i < index + count; ++i)
{
u32 offset = offsets[i];
u32 location = locations[i];
u32 pitch = pitchs[i];
const u32 offset = offsets[i];
const u32 location = locations[i];
const u32 pitch = pitchs[i];
if (!surface_info[i].pitch)
continue;
const u32 range = pitch * height;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
bool success = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch);
bool success = m_gl_texture_cache.load_memory_from_cache(texaddr, pitch * height, std::get<1>(m_rtts.m_bound_render_targets[i]));
//Fall back to slower methods if the image could not be fetched from cache.
if (!success)
@ -374,7 +376,6 @@ void GLGSRender::read_buffers()
}
else
{
u32 range = pitch * height;
m_gl_texture_cache.invalidate_range(texaddr, range);
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
@ -416,13 +417,15 @@ void GLGSRender::read_buffers()
if (g_cfg.video.read_depth_buffer)
{
//TODO: use pitch
u32 pitch = depth_surface_info.pitch;
const u32 pitch = depth_surface_info.pitch;
const u32 width = rsx::method_registers.surface_clip_width();
const u32 height = rsx::method_registers.surface_clip_height();
if (!pitch)
return;
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
bool in_cache = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch);
bool in_cache = m_gl_texture_cache.load_memory_from_cache(depth_address, pitch * height, std::get<1>(m_rtts.m_bound_depth_stencil));
if (in_cache)
return;
@ -433,7 +436,7 @@ void GLGSRender::read_buffers()
int pixel_size = rsx::internals::get_pixel_size(rsx::method_registers.surface_depth_fmt());
gl::buffer pbo_depth;
__glcheck pbo_depth.create(rsx::method_registers.surface_clip_width() * rsx::method_registers.surface_clip_height() * pixel_size);
__glcheck pbo_depth.create(width * height * pixel_size);
__glcheck pbo_depth.map([&](GLubyte* pixels)
{
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
@ -482,7 +485,7 @@ void GLGSRender::write_buffers()
*/
const u32 range = surface_info[i].pitch * surface_info[i].height;
__glcheck m_gl_texture_cache.save_rtt(surface_info[i].address, range);
__glcheck m_gl_texture_cache.flush_memory_to_cache(surface_info[i].address, range, true);
}
};
@ -497,6 +500,6 @@ void GLGSRender::write_buffers()
u32 range = depth_surface_info.width * depth_surface_info.height * 2;
if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2;
m_gl_texture_cache.save_rtt(depth_surface_info.address, range);
m_gl_texture_cache.flush_memory_to_cache(depth_surface_info.address, range, true);
}
}
}

View File

@ -48,7 +48,7 @@ namespace rsx
namespace gl
{
class render_target : public texture
class render_target : public texture, public rsx::render_target_descriptor<u32>
{
bool is_cleared = false;
@ -82,7 +82,7 @@ namespace gl
native_pitch = pitch;
}
u16 get_native_pitch() const
u16 get_native_pitch() const override
{
return native_pitch;
}
@ -93,17 +93,29 @@ namespace gl
rsx_pitch = pitch;
}
u16 get_rsx_pitch() const
u16 get_rsx_pitch() const override
{
return rsx_pitch;
}
std::pair<u16, u16> get_dimensions()
u16 get_surface_width() const override
{
if (!surface_height) surface_height = height();
if (!surface_width) surface_width = width();
return surface_width;
}
return std::make_pair(surface_width, surface_height);
u16 get_surface_height() const override
{
return surface_height;
}
u32 get_surface() const override
{
return id();
}
u32 get_view() const
{
return id();
}
void set_compatible_format(texture::internal_format format)
@ -111,10 +123,16 @@ namespace gl
compatible_internal_format = format;
}
texture::internal_format get_compatible_internal_format()
texture::internal_format get_compatible_internal_format() const override
{
return compatible_internal_format;
}
void update_surface()
{
surface_width = width();
surface_height = height();
}
};
}
@ -159,6 +177,7 @@ struct gl_render_target_traits
result->old_contents = old_surface;
result->set_cleared();
result->update_surface();
return result;
}
@ -198,18 +217,17 @@ struct gl_render_target_traits
if (old_surface != nullptr && old_surface->get_compatible_internal_format() == format.internal_format)
result->old_contents = old_surface;
result->update_surface();
return result;
}
static
void get_surface_info(gl::render_target *surface, rsx::surface_format_info *info)
{
const auto dims = surface->get_dimensions();
info->rsx_pitch = surface->get_rsx_pitch();
info->native_pitch = surface->get_native_pitch();
info->surface_width = std::get<0>(dims);
info->surface_height = std::get<1>(dims);
info->surface_width = surface->get_surface_width();
info->surface_height = surface->get_surface_height();
info->bpp = static_cast<u8>(info->native_pitch / info->surface_width);
}

View File

@ -5,7 +5,6 @@
#include "../RSXThread.h"
#include "../RSXTexture.h"
#include "../rsx_utils.h"
#include "../Common/TextureUtils.h"
namespace gl
{
@ -162,20 +161,20 @@ namespace gl
}
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_FILTER, min_filter);
glSamplerParameterf(samplerHandle, GL_TEXTURE_LOD_BIAS, 0.f);
glSamplerParameterf(samplerHandle, GL_TEXTURE_MIN_LOD, -1000.f);
glSamplerParameterf(samplerHandle, GL_TEXTURE_MAX_LOD, 1000.f);
glSamplerParameterf(samplerHandle, GL_TEXTURE_LOD_BIAS, 0.f);
glSamplerParameterf(samplerHandle, GL_TEXTURE_MIN_LOD, -1000.f);
glSamplerParameterf(samplerHandle, GL_TEXTURE_MAX_LOD, 1000.f);
}
else
{
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_FILTER, tex_min_filter(tex.min_filter()));
glSamplerParameterf(samplerHandle, GL_TEXTURE_LOD_BIAS, tex.bias());
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8));
glSamplerParameteri(samplerHandle, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8));
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_FILTER, tex_min_filter(tex.min_filter()));
glSamplerParameterf(samplerHandle, GL_TEXTURE_LOD_BIAS, tex.bias());
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8));
glSamplerParameteri(samplerHandle, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8));
}
glSamplerParameteri(samplerHandle, GL_TEXTURE_MAG_FILTER, tex_mag_filter(tex.mag_filter()));
glSamplerParameterf(samplerHandle, GL_TEXTURE_MAX_ANISOTROPY_EXT, ::gl::max_aniso(tex.max_aniso()));
glSamplerParameteri(samplerHandle, GL_TEXTURE_MAG_FILTER, tex_mag_filter(tex.mag_filter()));
glSamplerParameterf(samplerHandle, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_aniso(tex.max_aniso()));
const u32 texture_format = tex.format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
if (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8)
@ -197,10 +196,7 @@ namespace gl
else
glSamplerParameteri(samplerHandle, GL_TEXTURE_COMPARE_MODE, GL_NONE);
}
}
namespace
{
bool is_compressed_format(u32 texture_format)
{
switch (texture_format)
@ -234,23 +230,6 @@ namespace
fmt::throw_exception("Unknown format 0x%x" HERE, texture_format);
}
bool requires_unpack_byte(u32 texture_format)
{
switch (texture_format)
{
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_Y16_X16:
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
case CELL_GCM_TEXTURE_D1R5G5B5:
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return true;
}
return false;
}
std::array<GLenum, 4> get_swizzle_remap(u32 texture_format)
{
// NOTE: This must be in ARGB order in all forms below.
@ -270,307 +249,253 @@ namespace
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return { GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE };
return{ GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE };
case CELL_GCM_TEXTURE_A4R4G4B4:
return { GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA };
return{ GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA };
case CELL_GCM_TEXTURE_B8:
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_X32_FLOAT:
return { GL_RED, GL_RED, GL_RED, GL_RED };
return{ GL_RED, GL_RED, GL_RED, GL_RED };
case CELL_GCM_TEXTURE_G8B8:
return { GL_GREEN, GL_RED, GL_GREEN, GL_RED};
case CELL_GCM_TEXTURE_G8B8:
return{ GL_GREEN, GL_RED, GL_GREEN, GL_RED };
case CELL_GCM_TEXTURE_Y16_X16:
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
return { GL_RED, GL_GREEN, GL_RED, GL_GREEN};
return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN };
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN };
return{ GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN };
case CELL_GCM_TEXTURE_D1R5G5B5:
case CELL_GCM_TEXTURE_D8R8G8B8:
return { GL_ONE, GL_RED, GL_GREEN, GL_BLUE };
case CELL_GCM_TEXTURE_D8R8G8B8:
return{ GL_ONE, GL_RED, GL_GREEN, GL_BLUE };
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
return { GL_RED, GL_GREEN, GL_RED, GL_GREEN };
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN };
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return { GL_ZERO, GL_GREEN, GL_BLUE, GL_RED };
return{ GL_ZERO, GL_GREEN, GL_BLUE, GL_RED };
}
fmt::throw_exception("Unknown format 0x%x" HERE, texture_format);
}
}
namespace rsx
{
namespace gl
GLuint create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type)
{
static const int gl_tex_zfunc[] =
if (is_compressed_format(gcm_format))
{
GL_NEVER,
GL_LESS,
GL_EQUAL,
GL_LEQUAL,
GL_GREATER,
GL_NOTEQUAL,
GL_GEQUAL,
GL_ALWAYS,
};
void texture::create()
{
if (m_id)
{
remove();
}
glGenTextures(1, &m_id);
//Compressed formats have a 4-byte alignment
//TODO: Verify that samplers are not affected by the padding
width = align(width, 4);
height = align(height, 4);
}
u16 texture::get_pitch_modifier(u32 format)
GLuint id = 0;
GLenum target;
GLenum internal_format = get_sized_internal_format(gcm_format);
glGenTextures(1, &id);
switch (type)
{
switch (format)
{
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
default:
LOG_ERROR(RSX, "Unimplemented pitch modifier for texture format: 0x%x", format);
return 0;
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return 4;
case CELL_GCM_TEXTURE_B8:
return 1;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return 0;
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_G8B8:
case CELL_GCM_TEXTURE_R6G5B5:
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_D1R5G5B5:
return 2;
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_X32_FLOAT:
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_D8R8G8B8:
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
case CELL_GCM_TEXTURE_Y16_X16:
return 4;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return 8;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return 16;
}
case rsx::texture_dimension_extended::texture_dimension_1d:
target = GL_TEXTURE_1D;
glBindTexture(GL_TEXTURE_1D, id);
glTexStorage1D(GL_TEXTURE_1D, mipmaps, internal_format, width);
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
target = GL_TEXTURE_2D;
glBindTexture(GL_TEXTURE_2D, id);
glTexStorage2D(GL_TEXTURE_2D, mipmaps, internal_format, width, height);
break;
case rsx::texture_dimension_extended::texture_dimension_3d:
target = GL_TEXTURE_3D;
glBindTexture(GL_TEXTURE_3D, id);
glTexStorage3D(GL_TEXTURE_3D, mipmaps, internal_format, width, height, depth);
break;
case rsx::texture_dimension_extended::texture_dimension_cubemap:
target = GL_TEXTURE_CUBE_MAP;
glBindTexture(GL_TEXTURE_CUBE_MAP, id);
glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmaps, internal_format, width, height);
break;
}
namespace
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
return id;
}
void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth,
const std::vector<rsx_subresource_layout> &input_layouts, bool is_swizzled, std::vector<gsl::byte> staging_buffer)
{
int mip_level = 0;
if (is_compressed_format(format))
{
void create_and_fill_texture(rsx::texture_dimension_extended dim,
u16 mipmap_count, int format, u16 width, u16 height, u16 depth, const std::vector<rsx_subresource_layout> &input_layouts, bool is_swizzled,
std::vector<gsl::byte> staging_buffer)
{
int mip_level = 0;
if (is_compressed_format(format))
{
//Compressed formats have a 4-byte alignment
//TODO: Verify that samplers are not affected by the padding
width = align(width, 4);
height = align(height, 4);
}
if (dim == rsx::texture_dimension_extended::texture_dimension_1d)
{
__glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, ::gl::get_sized_internal_format(format), width);
if (!is_compressed_format(format))
{
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
{
u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
}
if (dim == rsx::texture_dimension_extended::texture_dimension_2d)
{
__glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, ::gl::get_sized_internal_format(format), width, height);
if (!is_compressed_format(format))
{
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
}
if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap)
{
__glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, ::gl::get_sized_internal_format(format), width, height);
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
// mip_level % mipmap_per_layer will always be equal to mip_level
if (!is_compressed_format(format))
{
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
mip_level++;
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
mip_level++;
}
}
return;
}
if (dim == rsx::texture_dimension_extended::texture_dimension_3d)
{
__glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, ::gl::get_sized_internal_format(format), width, height, depth);
if (!is_compressed_format(format))
{
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
{
u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
}
}
//Compressed formats have a 4-byte alignment
//TODO: Verify that samplers are not affected by the padding
width = align(width, 4);
height = align(height, 4);
}
bool texture::mandates_expansion(u32 format)
if (dim == rsx::texture_dimension_extended::texture_dimension_1d)
{
/**
* If a texture behaves differently when uploaded directly vs when uploaded via texutils methods, it should be added here.
*/
if (format == CELL_GCM_TEXTURE_A1R5G5B5)
return true;
return false;
glTexStorage1D(GL_TEXTURE_1D, mipmap_count, get_sized_internal_format(format), width);
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
{
u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
}
void texture::init(int index, rsx::fragment_texture& tex)
if (dim == rsx::texture_dimension_extended::texture_dimension_2d)
{
switch (tex.dimension())
if (!is_compressed_format(format))
{
case rsx::texture_dimension::dimension3d:
if (!tex.depth())
const auto &format_type = get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
return;
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
}
case rsx::texture_dimension::dimension2d:
if (!tex.height())
{
return;
}
case rsx::texture_dimension::dimension1d:
if (!tex.width())
{
return;
}
break;
}
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
//We can't re-use texture handles if using immutable storage
if (m_id)
else
{
__glcheck remove();
for (const rsx_subresource_layout &layout : input_layouts)
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
}
}
__glcheck create();
return;
}
__glcheck glActiveTexture(GL_TEXTURE0 + index);
bind();
if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap)
{
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
// mip_level % mipmap_per_layer will always be equal to mip_level
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
mip_level++;
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
mip_level++;
}
}
return;
}
u32 full_format = tex.format();
if (dim == rsx::texture_dimension_extended::texture_dimension_3d)
{
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
{
u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
}
}
u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
bool is_swizzled = !!(~full_format & CELL_GCM_TEXTURE_LN);
void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type,
std::vector<rsx_subresource_layout>& subresources_layout, std::pair<std::array<u8, 4>, std::array<u8, 4>>& decoded_remap, bool static_state)
{
const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap;
size_t texture_data_sz = get_placed_texture_storage_size(width, height, depth, gcm_format, mipmaps, is_cubemap, 256, 512);
std::vector<gsl::byte> data_upload_buf(texture_data_sz);
__glcheck ::gl::pixel_pack_settings().apply();
__glcheck ::gl::pixel_unpack_settings().apply();
const std::array<GLenum, 4>& glRemap = get_swizzle_remap(gcm_format);
u32 aligned_pitch = tex.pitch();
GLenum target;
GLenum remap_values[4];
size_t texture_data_sz = get_placed_texture_storage_size(tex, 256);
std::vector<gsl::byte> data_upload_buf(texture_data_sz);
u32 block_sz = get_pitch_modifier(format);
switch (type)
{
case rsx::texture_dimension_extended::texture_dimension_1d:
target = GL_TEXTURE_1D;
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
target = GL_TEXTURE_2D;
break;
case rsx::texture_dimension_extended::texture_dimension_3d:
target = GL_TEXTURE_3D;
break;
case rsx::texture_dimension_extended::texture_dimension_cubemap:
target = GL_TEXTURE_CUBE_MAP;
break;
}
__glcheck glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glBindTexture(target, id);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1);
__glcheck create_and_fill_texture(tex.get_extended_texture_dimension(), tex.get_exact_mipmap_count(), format, tex.width(), tex.height(), tex.depth(), get_subresources_layout(tex), is_swizzled, data_upload_buf);
if (static_state)
{
//Usually for vertex textures
const std::array<GLenum, 4>& glRemap = get_swizzle_remap(format);
glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, glRemap[0]);
glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, glRemap[1]);
glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, glRemap[2]);
glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, glRemap[3]);
glTexParameteri(m_target, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(m_target, GL_TEXTURE_MAX_LEVEL, tex.get_exact_mipmap_count() - 1);
auto decoded_remap = tex.decoded_remap();
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameterf(target, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.f);
}
else
{
//Remapping tables; format is A-R-G-B
//Remap input table. Contains channel index to read color from
const auto remap_inputs = decoded_remap.first;
@ -578,8 +503,6 @@ namespace rsx
//Remap control table. Controls whether the remap value is used, or force either 0 or 1
const auto remap_lookup = decoded_remap.second;
GLenum remap_values[4];
for (u8 channel = 0; channel < 4; ++channel)
{
switch (remap_lookup[channel])
@ -598,109 +521,14 @@ namespace rsx
}
}
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_A, remap_values[0]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_R, remap_values[1]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_G, remap_values[2]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_B, remap_values[3]);
//The rest of sampler state is now handled by sampler state objects
glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, remap_values[0]);
glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, remap_values[1]);
glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, remap_values[2]);
glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, remap_values[3]);
}
void texture::init(int index, rsx::vertex_texture& tex)
{
switch (tex.dimension())
{
case rsx::texture_dimension::dimension3d:
if (!tex.depth())
{
return;
}
//The rest of sampler state is now handled by sampler state objects
case rsx::texture_dimension::dimension2d:
if (!tex.height())
{
return;
}
case rsx::texture_dimension::dimension1d:
if (!tex.width())
{
return;
}
break;
}
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
//We can't re-use texture handles if using immutable storage
if (m_id)
{
__glcheck remove();
}
__glcheck create();
__glcheck glActiveTexture(GL_TEXTURE0 + index);
bind();
u32 full_format = tex.format();
u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
bool is_swizzled = !!(~full_format & CELL_GCM_TEXTURE_LN);
__glcheck::gl::pixel_pack_settings().apply();
__glcheck::gl::pixel_unpack_settings().apply();
u32 aligned_pitch = tex.pitch();
size_t texture_data_sz = get_placed_texture_storage_size(tex, 256);
std::vector<gsl::byte> data_upload_buf(texture_data_sz);
u32 block_sz = get_pitch_modifier(format);
__glcheck glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
__glcheck create_and_fill_texture(tex.get_extended_texture_dimension(), tex.get_exact_mipmap_count(), format, tex.width(), tex.height(), tex.depth(), get_subresources_layout(tex), is_swizzled, data_upload_buf);
const std::array<GLenum, 4>& glRemap = get_swizzle_remap(format);
glTexParameteri(m_target, GL_TEXTURE_MAX_LEVEL, tex.get_exact_mipmap_count() - 1);
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_A, glRemap[0]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_R, glRemap[1]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_G, glRemap[2]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_B, glRemap[3]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_S, GL_REPEAT);
__glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_T, GL_REPEAT);
__glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_R, GL_REPEAT);
__glcheck glTexParameteri(m_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
__glcheck glTexParameteri(m_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
__glcheck glTexParameterf(m_target, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.f);
}
void texture::bind()
{
glBindTexture(m_target, m_id);
}
void texture::unbind()
{
glBindTexture(m_target, 0);
}
void texture::remove()
{
if (m_id)
{
glDeleteTextures(1, &m_id);
m_id = 0;
}
}
u32 texture::id() const
{
return m_id;
}
fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, data_upload_buf);
}
}

View File

@ -1,5 +1,6 @@
#include "OpenGL.h"
#include "../GCM.h"
#include "../Common/TextureUtils.h"
namespace rsx
{
@ -14,6 +15,11 @@ namespace gl
GLenum wrap_mode(rsx::texture_wrap_mode wrap);
float max_aniso(rsx::texture_max_anisotropy aniso);
GLuint create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type);
void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type,
std::vector<rsx_subresource_layout>& subresources_layout, std::pair<std::array<u8, 4>, std::array<u8, 4>>& decoded_remap, bool static_state);
class sampler_state
{
GLuint samplerHandle = 0;
@ -38,42 +44,3 @@ namespace gl
void apply(rsx::fragment_texture& tex);
};
}
namespace rsx
{
namespace gl
{
class texture
{
u32 m_id = 0;
u32 m_target = GL_TEXTURE_2D;
public:
void create();
void init(int index, rsx::fragment_texture& tex);
void init(int index, rsx::vertex_texture& tex);
/**
* If a format is marked as mandating expansion, any request to have the data uploaded to the GPU shall require that the pixel data
* be decoded/expanded fully, regardless of whether the input is swizzled. This is because some formats behave differently when swizzled pixel data
* is decoded and when data is fed directly, usually byte order is not the same. Forcing decoding/expanding fixes this but slows performance.
*/
static bool mandates_expansion(u32 format);
/**
* The pitch modifier changes the pitch value supplied by the rsx::texture by supplying a suitable divisor or 0 if no change is needed.
* The modified value, if any, is then used to supply to GL the UNPACK_ROW_LENGTH for the texture data to be supplied.
*/
static u16 get_pitch_modifier(u32 format);
void bind();
void unbind();
void remove();
void set_target(u32 target) { m_target = target; }
void set_id(u32 id) { m_id = id; }
u32 id() const;
};
}
}

View File

@ -1,63 +0,0 @@
#include "stdafx.h"
#include "GLGSRender.h"
#include "GLTextureCache.h"
namespace gl
{
bool texture_cache::flush_section(u32 address)
{
if (address < no_access_range.first ||
address >= no_access_range.second)
return false;
bool post_task = false;
cached_texture_section* section_to_post = nullptr;
{
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
for (cached_texture_section &tex : no_access_memory_sections)
{
if (tex.is_dirty()) continue;
if (tex.is_locked() && tex.overlaps(address))
{
if (tex.is_flushed())
{
LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", tex.get_section_base(), tex.get_section_size());
continue;
}
//LOG_WARNING(RSX, "Cell needs GPU data synced here, address=0x%X", address);
if (std::this_thread::get_id() != m_renderer_thread)
{
post_task = true;
section_to_post = &tex;
break;
}
tex.flush();
return true;
}
}
}
if (post_task)
{
//LOG_WARNING(RSX, "Cache access not from worker thread! address = 0x%X", address);
work_item &task = m_renderer->post_flush_request(address, section_to_post);
vm::temporary_unlock();
{
std::unique_lock<std::mutex> lock(task.guard_mutex);
task.cv.wait(lock, [&task] { return task.processed; });
}
task.received = true;
return task.result;
}
return false;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -364,6 +364,15 @@ namespace rsx
return ((registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 12) & 0xf);
}
std::pair<std::array<u8, 4>, std::array<u8, 4>> vertex_texture::decoded_remap() const
{
return
{
{ CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G, CELL_GCM_TEXTURE_REMAP_FROM_B },
{ CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP }
};
}
u8 vertex_texture::zfunc() const
{
return ((registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 28) & 0xf);

View File

@ -129,6 +129,8 @@ namespace rsx
u8 aniso_bias() const;
u8 signed_remap() const;
std::pair<std::array<u8, 4>, std::array<u8, 4>> decoded_remap() const;
// Control0
bool enabled() const;
u16 min_lod() const;

View File

@ -638,6 +638,9 @@ VKGSRender::VKGSRender() : GSRender()
m_current_frame = &frame_context_storage[0];
m_texture_cache.initialize((*m_device), m_memory_type_mapping, m_optimal_tiling_supported_formats, m_swap_chain->get_present_queue(),
m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
supports_multidraw = true;
}
@ -742,16 +745,18 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
{
if (g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer)
{
bool flushable, synchronized;
u64 sync_timestamp;
std::tie(flushable, synchronized, sync_timestamp) = m_texture_cache.address_is_flushable(address);
bool flushable;
vk::cached_texture_section* section;
std::tie(flushable, section) = m_texture_cache.address_is_flushable(address);
if (!flushable)
return false;
const u64 sync_timestamp = section->get_sync_timestamp();
const bool is_rsxthr = std::this_thread::get_id() == rsx_thread;
if (synchronized)
if (section->is_synchronized())
{
//Wait for any cb submitted after the sync timestamp to finish
while (true)
@ -841,7 +846,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
{
if (m_texture_cache.invalidate_range(address_base, size, false))
m_texture_cache.flush(true);
m_texture_cache.purge_dirty();
}
void VKGSRender::begin()
@ -1038,8 +1043,7 @@ void VKGSRender::end()
continue;
}
vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts, m_memory_type_mapping,
m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
if (!texture0)
{
@ -1093,8 +1097,7 @@ void VKGSRender::end()
continue;
}
vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts, m_memory_type_mapping,
m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
if (!texture0)
{
@ -1429,7 +1432,7 @@ void VKGSRender::copy_render_targets_to_dma_location()
if (!m_surface_info[index].pitch)
continue;
m_texture_cache.flush_memory_to_cache(m_surface_info[index].address, m_surface_info[index].pitch * m_surface_info[index].height,
m_texture_cache.flush_memory_to_cache(m_surface_info[index].address, m_surface_info[index].pitch * m_surface_info[index].height, true,
*m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
}
}
@ -1440,7 +1443,7 @@ void VKGSRender::copy_render_targets_to_dma_location()
if (m_depth_surface_info.pitch)
{
m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height,
m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, true,
*m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
}
}
@ -1529,7 +1532,7 @@ void VKGSRender::advance_queued_frames()
m_rtts.free_invalidated();
//texture cache is also double buffered to prevent use-after-free
m_texture_cache.flush();
m_texture_cache.on_frame_end();
//Remove stale framebuffers. Ref counted to prevent use-after-free
m_framebuffers_to_clean.remove_if([](std::unique_ptr<vk::framebuffer_holder>& fbo)
@ -2179,7 +2182,7 @@ void VKGSRender::prepare_rtts()
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
m_surface_info[index].width, m_surface_info[index].height);
m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch);
}
}
@ -2192,7 +2195,7 @@ void VKGSRender::prepare_rtts()
const u32 range = pitch * m_depth_surface_info.height;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
m_depth_surface_info.width, m_depth_surface_info.height);
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch);
}
}
@ -2584,6 +2587,5 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
{
close_render_pass();
return m_texture_cache.upload_scaled_image(src, dst, interpolate, (*m_device), *m_current_command_buffer, m_memory_type_mapping,
m_swap_chain->get_present_queue(), m_rtts, m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
}
return m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
}

View File

@ -1481,5 +1481,5 @@ namespace vk
*/
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image,
const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer);
VkImageAspectFlags flags, vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer);
}

View File

@ -16,7 +16,7 @@ struct ref_counted
namespace vk
{
struct render_target : public image, public ref_counted
struct render_target : public image, public ref_counted, public rsx::render_target_descriptor<vk::image*>
{
bool dirty = false;
u16 native_pitch = 0;
@ -51,6 +51,31 @@ namespace vk
return view.get();
}
vk::image* get_surface() const override
{
return (vk::image*)this;
}
u16 get_surface_width() const override
{
return width();
}
u16 get_surface_height() const override
{
return height();
}
u16 get_rsx_pitch() const override
{
return rsx_pitch;
}
u16 get_native_pitch() const override
{
return native_pitch;
}
};
struct framebuffer_holder: public vk::framebuffer, public ref_counted

View File

@ -157,7 +157,7 @@ namespace vk
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image,
const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer)
VkImageAspectFlags flags, vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer)
{
u32 mipmap_level = 0;
u32 block_in_pixel = get_format_block_size_in_texel(format);
@ -178,7 +178,7 @@ namespace vk
copy_info.imageExtent.height = layout.height_in_block * block_in_pixel;
copy_info.imageExtent.width = layout.width_in_block * block_in_pixel;
copy_info.imageExtent.depth = layout.depth;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.aspectMask = flags;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count;
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,8 @@ namespace rsx
u16 pitch;
void *pixels;
bool compressed_x;
bool compressed_y;
u32 rsx_address;
};
@ -36,10 +38,14 @@ namespace rsx
u16 clip_y;
u16 clip_width;
u16 clip_height;
f32 scale_x;
f32 scale_y;
bool swizzled;
void *pixels;
bool compressed_x;
bool compressed_y;
u32 rsx_address;
};

View File

@ -593,6 +593,7 @@ namespace rsx
const s32 out_offset = out_x * out_bpp + out_pitch * out_y;
const tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);
const tiled_region dst_region = rsx->get_tiled_address(dst_offset + out_offset, dst_dma & 0xf);
u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
u8* pixels_dst = vm::ps3::_ptr<u8>(get_address(dst_offset + out_offset, dst_dma));
@ -626,12 +627,19 @@ namespace rsx
u32 convert_h = (u32)(scale_y * in_h);
u32 slice_h = clip_h;
blit_src_info src_info = {};
blit_dst_info dst_info = {};
if (src_region.tile)
{
if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2)
switch(src_region.tile->comp)
{
case CELL_GCM_COMPMODE_C32_2X2:
slice_h *= 2;
src_info.compressed_y = true;
case CELL_GCM_COMPMODE_C32_2X1:
src_info.compressed_x = true;
break;
}
u32 size = slice_h * in_pitch;
@ -643,20 +651,29 @@ namespace rsx
}
}
if (dst_region.tile)
{
switch (dst_region.tile->comp)
{
case CELL_GCM_COMPMODE_C32_2X2:
dst_info.compressed_y = true;
case CELL_GCM_COMPMODE_C32_2X1:
dst_info.compressed_x = true;
break;
}
}
if (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER)
{
//For now, only use this for actual scaled images, there are use cases that should not go through 3d engine, e.g program ucode transfer
//TODO: Figure out more instances where we can use this without problems
blit_src_info src_info;
blit_dst_info dst_info;
//NOTE: In cases where slice_h is modified due to compression (read from tiled memory), the new value (clip_h * 2) does not matter if memory is on the GPU
src_info.format = src_color_format;
src_info.origin = in_origin;
src_info.width = in_w;
src_info.height = in_h;
src_info.pitch = in_pitch;
src_info.slice_h = slice_h;
src_info.slice_h = clip_h;
src_info.offset_x = (u16)in_x;
src_info.offset_y = (u16)in_y;
src_info.pixels = pixels_src;
@ -672,6 +689,8 @@ namespace rsx
dst_info.offset_x = out_x;
dst_info.offset_y = out_y;
dst_info.pitch = out_pitch;
dst_info.scale_x = scale_x;
dst_info.scale_y = scale_y;
dst_info.pixels = pixels_dst;
dst_info.rsx_address = get_address(dst_offset, dst_dma);
dst_info.swizzled = (method_registers.blit_engine_context_surface() == blit_engine::context_surface::swizzle2d);

View File

@ -61,6 +61,9 @@
<Import Project="..\rpcs3_llvm.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">
<ClCompile />
</ItemDefinitionGroup>
<ItemGroup>
<ProjectReference Include="emucore.vcxproj">
<Project>{c4a10229-4712-4bd2-b63e-50d93c67a038}</Project>
@ -84,7 +87,6 @@
<ClCompile Include="Emu\RSX\GL\GLCommonDecompiler.cpp" />
<ClCompile Include="Emu\RSX\GL\GLFragmentProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
@ -95,4 +97,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -10,7 +10,6 @@
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />

View File

@ -90,9 +90,9 @@
</ClCompile>
<ClCompile Include="..\Utilities\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\LUrlParser.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\LUrlParser.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\Config.cpp" />
<ClCompile Include="..\Utilities\mutex.cpp">
@ -647,6 +647,7 @@
<ClInclude Include="Emu\Memory\wait_engine.h" />
<ClInclude Include="Emu\RSX\Common\GLSLCommon.h" />
<ClInclude Include="Emu\RSX\Common\TextGlyphs.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache.h" />
<ClInclude Include="Emu\RSX\gcm_enums.h" />
<ClInclude Include="Emu\RSX\gcm_printing.h" />
<ClInclude Include="Emu\RSX\rsx_cache.h" />

View File

@ -929,6 +929,9 @@
<ClCompile Include="Emu\Cell\lv2\sys_gamepad.cpp">
<Filter>Emu\Cell\lv2</Filter>
</ClCompile>
<ClCompile Include="..\Utilities\LUrlParser.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -1795,5 +1798,8 @@
<ClInclude Include="Emu\Cell\lv2\sys_gamepad.h">
<Filter>Emu\Cell\lv2</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\texture_cache.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
</Project>