gl: Minor optimizations

rsx: Texture cache - improvements to locking
rsx: Minor optimizations to get_current_vertex_program and begin-end batch flushes
rsx: Optimize texture cache storage
- Manages storage in blocks of 16MB
rsx/vk/gl: Fix swizzled texture input
gl: Hotfix for compressed texture formats
This commit is contained in:
kd-11 2017-09-14 14:37:14 +03:00
parent e37a2a8f7d
commit 45d0e821dc
9 changed files with 372 additions and 267 deletions

View File

@ -14,6 +14,12 @@ namespace rsx
swapped_native_component_order = 2,
};
enum texture_upload_context
{
shader_read = 0,
blit_engine_src = 1
};
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>
class texture_cache
{
@ -34,12 +40,14 @@ namespace rsx
void notify(u32 data_size)
{
verify(HERE), valid_count >= 0;
max_range = std::max(data_size, max_range);
valid_count++;
}
void add(section_storage_type& section, u32 data_size)
{
verify(HERE), valid_count >= 0;
max_range = std::max(data_size, max_range);
valid_count++;
@ -65,7 +73,7 @@ namespace rsx
std::unordered_map<u32, framebuffer_memory_characteristics> m_cache_miss_statistics_table;
//Memory usage
const s32 m_max_zombie_objects = 32; //Limit on how many texture objects to keep around for reuse after they are invalidated
const s32 m_max_zombie_objects = 128; //Limit on how many texture objects to keep around for reuse after they are invalidated
s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory
/* Helpers */
@ -74,11 +82,141 @@ namespace rsx
virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_storage_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) = 0;
virtual section_storage_type* create_new_texture(commandbuffer_type&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, const u32 gcm_format,
const rsx::texture_dimension_extended type, const texture_create_flags flags, std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) = 0;
virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format,
virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format, const texture_upload_context context,
std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled, std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) = 0;
virtual void enforce_surface_creation_type(section_storage_type& section, const texture_create_flags expected) = 0;
virtual void insert_texture_barrier() = 0;
private:
//Internal implementation methods
bool invalidate_range_impl(u32 address, u32 range, bool unprotect)
{
bool response = false;
u32 last_dirty_block = 0;
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{
auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (base == last_dirty_block && range_data.valid_count == 0)
continue;
if (trampled_range.first >= (base + get_block_size()) || base >= trampled_range.second)
continue;
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue;
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
i = 0;
trampled_range = new_range;
range_reset = true;
}
if (unprotect)
{
tex.set_dirty(true);
tex.unprotect();
}
else
{
tex.discard();
}
m_unreleased_texture_objects++;
range_data.valid_count--;
response = true;
}
}
if (range_reset)
{
last_dirty_block = base;
It = m_cache.begin();
}
}
return response;
}
template <typename ...Args>
bool flush_address_impl(u32 address, Args&&... extras)
{
bool response = false;
u32 last_dirty_block = 0;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{
auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (base == last_dirty_block && range_data.valid_count == 0)
continue;
if (trampled_range.first >= (base + get_block_size()) || base >= trampled_range.second)
continue;
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
i = 0;
trampled_range = new_range;
range_reset = true;
}
//TODO: Map basic host_visible memory without coherent constraint
if (!tex.flush(std::forward<Args>(extras)...))
{
//Missed address, note this
//TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(tex);
}
response = true;
range_data.valid_count--;
}
}
if (range_reset)
{
It = m_cache.begin();
}
}
return response;
}
constexpr u32 get_block_size() const { return 0x1000000; }
inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); }
public:
texture_cache() {}
@ -93,7 +231,9 @@ namespace rsx
auto test = std::make_pair(rsx_address, range);
for (auto &address_range : m_cache)
{
if (address_range.second.valid_count == 0) continue;
auto &range_data = address_range.second;
for (auto &tex : range_data.data)
{
if (tex.get_section_base() > rsx_address)
@ -109,7 +249,7 @@ namespace rsx
section_storage_type *find_texture_from_dimensions(u32 rsx_address, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
auto found = m_cache.find(rsx_address);
auto found = m_cache.find(get_block_address(rsx_address));
if (found != m_cache.end())
{
auto &range_data = found->second;
@ -127,10 +267,9 @@ namespace rsx
section_storage_type& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
{
reader_lock lock(m_cache_mutex);
const u32 block_address = get_block_address(rsx_address);
auto found = m_cache.find(rsx_address);
auto found = m_cache.find(block_address);
if (found != m_cache.end())
{
auto &range_data = found->second;
@ -166,20 +305,15 @@ namespace rsx
}
}
}
}
writer_lock lock(m_cache_mutex);
section_storage_type tmp;
m_cache[rsx_address].add(tmp, rsx_size);
return m_cache[rsx_address].data.back();
m_cache[block_address].add(tmp, rsx_size);
return m_cache[block_address].data.back();
}
section_storage_type* find_flushable_section(const u32 address, const u32 range)
{
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(address);
auto found = m_cache.find(get_block_address(address));
if (found != m_cache.end())
{
auto &range_data = found->second;
@ -199,9 +333,8 @@ namespace rsx
template <typename ...Args>
void lock_memory_region(image_storage_type* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height, const u32 pitch, Args&&... extras)
{
section_storage_type& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
writer_lock lock(m_cache_mutex);
section_storage_type& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
if (!region.is_locked())
{
@ -217,6 +350,7 @@ namespace rsx
template <typename ...Args>
bool flush_memory_to_cache(const u32 memory_address, const u32 memory_size, bool skip_synchronized, Args&&... extra)
{
writer_lock lock(m_cache_mutex);
section_storage_type* region = find_flushable_section(memory_address, memory_size);
//TODO: Make this an assertion
@ -236,6 +370,7 @@ namespace rsx
template <typename ...Args>
bool load_memory_from_cache(const u32 memory_address, const u32 memory_size, Args&&... extras)
{
reader_lock lock(m_cache_mutex);
section_storage_type *region = find_flushable_section(memory_address, memory_size);
if (region && !region->is_dirty())
@ -256,7 +391,7 @@ namespace rsx
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(address);
auto found = m_cache.find(get_block_address(address));
if (found != m_cache.end())
{
auto &range_data = found->second;
@ -304,74 +439,8 @@ namespace rsx
address > no_access_range.second)
return false;
bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
std::unordered_map<u32, bool> processed_ranges;
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{
auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (processed_ranges[base] || range_data.valid_count == 0)
continue;
//Quickly discard range
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
(lock_base > address || lock_limit <= address))
{
processed_ranges[base] = true;
continue;
}
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
i = 0;
trampled_range = new_range;
range_reset = true;
}
//TODO: Map basic host_visible memory without coherent constraint
if (!tex.flush(std::forward<Args>(extras)...))
{
//Missed address, note this
//TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(tex);
}
response = true;
}
}
if (range_reset)
{
processed_ranges.clear();
It = m_cache.begin();
}
processed_ranges[base] = true;
}
return response;
return flush_address_impl(address, std::forward<Args>(extras)...);
}
bool invalidate_address(u32 address)
@ -392,76 +461,8 @@ namespace rsx
return false;
}
bool response = false;
std::unordered_map<u32, bool> processed_ranges;
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{
auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (processed_ranges[base] || range_data.valid_count == 0)
continue;
//Quickly discard range
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second)
{
processed_ranges[base] = true;
continue;
}
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue;
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
i = 0;
trampled_range = new_range;
range_reset = true;
}
if (unprotect)
{
tex.set_dirty(true);
tex.unprotect();
}
else
{
tex.discard();
}
m_unreleased_texture_objects++;
range_data.valid_count--;
response = true;
}
}
if (range_reset)
{
processed_ranges.clear();
It = m_cache.begin();
}
processed_ranges[base] = true;
}
return response;
return invalidate_range_impl(address, range, unprotect);
}
void record_cache_miss(section_storage_type &tex)
@ -521,6 +522,8 @@ namespace rsx
void purge_dirty()
{
writer_lock lock(m_cache_mutex);
//Reclaims all graphics memory consumed by dirty textures
std::vector<u32> empty_addresses;
empty_addresses.resize(32);
@ -611,6 +614,17 @@ namespace rsx
return texptr->get_view();
}
{
//Search in cache and upload/bind
reader_lock lock(m_cache_mutex);
auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height);
if (cached_texture)
{
return cached_texture->get_raw_view();
}
}
/* Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise
* (Turbo: Super Stunt Squad does this; bypassing the need for a sync object)
* The engine does not read back the texture resource through cell, but specifies a texture location that is
@ -664,15 +678,6 @@ namespace rsx
}
}
//If all the above failed, then its probably a generic texture.
//Search in cache and upload/bind
auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height);
if (cached_texture)
{
return cached_texture->get_raw_view();
}
//Do direct upload from CPU as the last resort
const auto extended_dimension = tex.get_extended_texture_dimension();
u16 height = 0;
@ -698,12 +703,13 @@ namespace rsx
break;
}
writer_lock lock(m_cache_mutex);
const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN);
auto subresources_layout = get_subresources_layout(tex);
auto remap_vector = tex.decoded_remap();
return upload_image_from_cpu(cmd, texaddr, tex_width, height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
}
template <typename surface_store_type, typename blitter_type, typename ...Args>
@ -770,7 +776,9 @@ namespace rsx
}
}
reader_lock lock(m_cache_mutex);
section_storage_type* cached_dest = nullptr;
if (!dst_is_render_target)
{
//First check if this surface exists in VRAM with exact dimensions
@ -785,7 +793,7 @@ namespace rsx
//Prep surface
enforce_surface_creation_type(*cached_dest, dst.swizzled ? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order);
//TODO: Move this code into utils since it is used alot
const auto old_dst_area = dst_area;
if (const u32 address_offset = dst.rsx_address - cached_dest->get_section_base())
{
const u16 bpp = dst_is_argb8 ? 4 : 2;
@ -809,11 +817,16 @@ namespace rsx
max_dst_height = cached_dest->get_height();
}
else
{
cached_dest = nullptr;
dst_area = old_dst_area;
}
}
if (!cached_dest && is_memcpy)
{
lock.upgrade();
invalidate_range_impl(dst_address, memcpy_bytes_length, true);
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
}
@ -839,6 +852,8 @@ namespace rsx
if (rsx_pitch <= 64 && native_pitch != rsx_pitch)
{
lock.upgrade();
invalidate_range_impl(dst_address, memcpy_bytes_length, true);
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
}
@ -856,7 +871,9 @@ namespace rsx
}
else
{
flush_address(src.rsx_address, std::forward<Args>(extras)...);
lock.upgrade();
flush_address_impl(src_address, std::forward<Args>(extras)...);
const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
std::vector<rsx_subresource_layout> subresource_layout;
@ -869,7 +886,7 @@ namespace rsx
subresource_layout.push_back(subres);
const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format,
vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src,
subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled, default_remap_vector)->get_raw_texture();
}
}
@ -928,7 +945,8 @@ namespace rsx
//TODO: Check for other types of format mismatch
if (format_mismatch)
{
invalidate_range(cached_dest->get_section_base(), cached_dest->get_section_size());
lock.upgrade();
invalidate_range_impl(cached_dest->get_section_base(), cached_dest->get_section_size(), true);
dest_texture = 0;
cached_dest = nullptr;
@ -958,6 +976,8 @@ namespace rsx
else
gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
lock.upgrade();
dest_texture = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst.clip_height,
dst_dimensions.width, dst_dimensions.height, 1, 1,
gcm_format, rsx::texture_dimension_extended::texture_dimension_2d,

View File

@ -462,14 +462,15 @@ void GLGSRender::end()
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
int location;
if (!rsx::method_registers.fragment_textures[i].enabled())
continue;
if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
if (rsx::method_registers.fragment_textures[i].enabled() && m_program->uniforms.has_location("tex" + std::to_string(i), &location))
{
m_gl_texture_cache.upload_and_bind_texture(i, get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]), rsx::method_registers.fragment_textures[i], m_rtts);
if (m_textures_dirty[i])
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
}
m_textures_dirty[i] = false;
}
//Vertex textures

View File

@ -186,8 +186,22 @@ void GLGSRender::init_buffers(bool skip_reading)
draw_fbo.recreate();
bool old_format_found = false;
gl::texture::format old_format;
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (surface_info[i].pitch && g_cfg.video.write_color_buffers)
{
if (!old_format_found)
{
old_format = rsx::internals::surface_color_format_to_gl(surface_info[i].color_format).format;
old_format_found = true;
}
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_info[i].address, surface_info[i].pitch * surface_info[i].height);
}
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
{
__glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]);

View File

@ -36,7 +36,7 @@ namespace gl
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
}
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
fmt::throw_exception("Unknown texture format 0x%x" HERE, texture_format);
}
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format)
@ -63,6 +63,9 @@ namespace gl
case CELL_GCM_TEXTURE_D1R5G5B5: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV);
case CELL_GCM_TEXTURE_D8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8);
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return std::make_tuple(GL_RG, GL_HALF_FLOAT);
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE);
}
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
}
@ -333,7 +336,7 @@ namespace gl
}
void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth,
const std::vector<rsx_subresource_layout> &input_layouts, bool is_swizzled, std::vector<gsl::byte> staging_buffer)
const std::vector<rsx_subresource_layout> &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<gsl::byte> staging_buffer)
{
int mip_level = 0;
if (is_compressed_format(format))
@ -349,11 +352,10 @@ namespace gl
glTexStorage1D(GL_TEXTURE_1D, mipmap_count, get_sized_internal_format(format), width);
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, gl_format, gl_type, staging_buffer.data());
}
}
else
@ -362,7 +364,7 @@ namespace gl
{
u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, gl_format, size, staging_buffer.data());
}
}
return;
@ -372,11 +374,10 @@ namespace gl
{
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
}
}
else
@ -385,7 +386,7 @@ namespace gl
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data());
}
}
return;
@ -398,11 +399,10 @@ namespace gl
// mip_level % mipmap_per_layer will always be equal to mip_level
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
mip_level++;
}
}
@ -412,7 +412,7 @@ namespace gl
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data());
mip_level++;
}
}
@ -423,11 +423,10 @@ namespace gl
{
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, gl_format, gl_type, staging_buffer.data());
}
}
else
@ -436,7 +435,7 @@ namespace gl
{
u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, get_sized_internal_format(format), size, staging_buffer.data());
glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, gl_format, size, staging_buffer.data());
}
}
return;
@ -529,6 +528,9 @@ namespace gl
//The rest of sampler state is now handled by sampler state objects
fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, data_upload_buf);
const auto format_type = get_format_type(gcm_format);
const GLenum gl_format = std::get<0>(format_type);
const GLenum gl_type = std::get<1>(format_type);
fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
}
}

View File

@ -17,6 +17,14 @@ namespace gl
GLuint create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type);
/**
* is_swizzled - determines whether input bytes are in morton order
* subresources_layout - descriptor of the mipmap levels in memory
* decoded_remap - two vectors, first one contains index to read, e.g if v[0] = 1 then component 0[A] in the texture should read as component 1[R]
* - layout of vector is in A-R-G-B
* - second vector contains overrides to force the value to either 0 or 1 instead of reading from texture
* static_state - set up the texture without consideration for sampler state (useful for vertex textures which have no real sampler state on RSX)
*/
void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type,
std::vector<rsx_subresource_layout>& subresources_layout, std::pair<std::array<u8, 4>, std::array<u8, 4>>& decoded_remap, bool static_state);

View File

@ -47,6 +47,8 @@ namespace gl
texture::type type = texture::type::ubyte;
bool pack_unpack_swap_bytes = false;
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
u8 get_pixel_size(texture::format fmt_, texture::type type_)
{
u8 size = 1;
@ -224,6 +226,11 @@ namespace gl
vram_texture = source.id();
}
void set_view_flags(const rsx::texture_create_flags flags)
{
view_flags = flags;
}
void copy_texture(bool=false)
{
if (!glIsTexture(vram_texture))
@ -306,7 +313,6 @@ namespace gl
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
protect(utils::protection::ro);
return true;
}
@ -410,6 +416,11 @@ namespace gl
return (gl::texture::format)fmt == tex->get_internal_format();
}
rsx::texture_create_flags get_view_flags() const
{
return view_flags;
}
};
class texture_cache : public rsx::texture_cache<void*, cached_texture_section, u32, u32, gl::texture, gl::texture::format>
@ -577,28 +588,54 @@ namespace gl
break;
}
if (flags == rsx::texture_create_flags::swapped_native_component_order)
{
glBindTexture(GL_TEXTURE_2D, vram_texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, GL_ALPHA);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, GL_RED);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_GREEN);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_BLUE);
}
auto& cached = create_texture(vram_texture, rsx_address, rsx_size, width, height);
cached.protect(utils::protection::ro);
cached.set_dirty(false);
cached.set_depth_flag(depth_flag);
cached.set_view_flags(flags);
return &cached;
}
cached_texture_section* upload_image_from_cpu(void*&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format,
std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled,
const rsx::texture_upload_context context, std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled,
std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) override
{
void* unused = nullptr;
auto section = create_new_texture(unused, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, type,
rsx::texture_create_flags::default_component_order, remap_vector);
gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, pitch, swizzled, type, subresource_layout, remap_vector, false);
//Swizzling is ignored for blit engine copy and emulated using remapping
bool input_swizzled = (context == rsx::texture_upload_context::blit_engine_src)? false : swizzled;
gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, pitch, input_swizzled, type, subresource_layout, remap_vector, false);
return section;
}
void enforce_surface_creation_type(cached_texture_section& section, const rsx::texture_create_flags flags) override
{
if (flags == section.get_view_flags())
return;
if (flags == rsx::texture_create_flags::swapped_native_component_order)
{
glBindTexture(GL_TEXTURE_2D, section.get_raw_texture());
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, GL_ALPHA);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, GL_RED);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_GREEN);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_BLUE);
}
section.set_view_flags(flags);
}
void insert_texture_barrier() override
@ -630,6 +667,8 @@ namespace gl
bool is_depth_texture(const u32 rsx_address) override
{
reader_lock lock(m_cache_mutex);
auto section = find_texture_from_range(rsx_address, 64u);
if (section != nullptr) return section->is_depth_texture();

View File

@ -403,7 +403,8 @@ namespace rsx
std::vector <std::pair<u32, u32>> split_ranges;
auto first_count_cmds = method_registers.current_draw_clause.first_count_commands;
if (method_registers.current_draw_clause.first_count_commands.size() > 1)
if (method_registers.current_draw_clause.first_count_commands.size() > 1 &&
method_registers.current_draw_clause.is_disjoint_primitive)
{
u32 next = method_registers.current_draw_clause.first_count_commands.front().first;
u32 last_head = 0;
@ -433,13 +434,18 @@ namespace rsx
{
std::vector<std::pair<u32, u32>> tmp;
auto list_head = first_count_cmds.begin();
bool emit_begin = false;
for (auto &range : split_ranges)
{
tmp.resize(range.second - range.first + 1);
std::copy(list_head + range.first, list_head + range.second, tmp.begin());
if (emit_begin)
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type);
else
emit_begin = true;
method_registers.current_draw_clause.first_count_commands = tmp;
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
}
@ -565,8 +571,14 @@ namespace rsx
deferred_primitive_type = value;
else
{
has_deferred_call = true;
flush_commands_flag = false;
execute_method_call = false;
deferred_call_size++;
if (method_registers.current_draw_clause.is_disjoint_primitive)
{
// Combine all calls since the last one
auto &first_count = method_registers.current_draw_clause.first_count_commands;
if (first_count.size() > deferred_call_size)
@ -596,10 +608,7 @@ namespace rsx
first_count[deferred_call_size - 1].second = count;
first_count.resize(deferred_call_size);
}
has_deferred_call = true;
flush_commands_flag = false;
execute_method_call = false;
}
}
break;
@ -1049,24 +1058,33 @@ namespace rsx
void thread::get_current_vertex_program()
{
auto &result = current_vertex_program = {};
const u32 transform_program_start = rsx::method_registers.transform_program_start();
result.data.reserve((512 - transform_program_start) * 4);
result.rsx_vertex_inputs.reserve(rsx::limits::vertex_count);
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
current_vertex_program.skip_vertex_input_check = false;
current_vertex_program.rsx_vertex_inputs.resize(0);
current_vertex_program.data.resize(512 * 4);
current_vertex_program.rsx_vertex_inputs.reserve(rsx::limits::vertex_count);
u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4);
u32* ucode_dst = current_vertex_program.data.data();
u32 ucode_size = 0;
D3 d3;
for (int i = transform_program_start; i < 512; ++i)
{
result.data.resize((i - transform_program_start) * 4 + 4);
memcpy(result.data.data() + (i - transform_program_start) * 4, rsx::method_registers.transform_program.data() + i * 4, 4 * sizeof(u32));
D3 d3;
d3.HEX = rsx::method_registers.transform_program[i * 4 + 3];
ucode_size += 4;
memcpy(ucode_dst, ucode_src, 4 * sizeof(u32));
d3.HEX = ucode_src[3];
if (d3.end)
break;
ucode_src += 4;
ucode_dst += 4;
}
result.output_mask = rsx::method_registers.vertex_attrib_output_mask();
current_vertex_program.data.resize(ucode_size);
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
@ -1079,7 +1097,7 @@ namespace rsx
if (rsx::method_registers.vertex_arrays_info[index].size() > 0)
{
result.rsx_vertex_inputs.push_back(
current_vertex_program.rsx_vertex_inputs.push_back(
{index,
rsx::method_registers.vertex_arrays_info[index].size(),
rsx::method_registers.vertex_arrays_info[index].frequency(),
@ -1089,7 +1107,7 @@ namespace rsx
}
else if (vertex_push_buffers[index].vertex_count > 1)
{
result.rsx_vertex_inputs.push_back(
current_vertex_program.rsx_vertex_inputs.push_back(
{ index,
rsx::method_registers.register_vertex_info[index].size,
1,
@ -1099,7 +1117,7 @@ namespace rsx
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
result.rsx_vertex_inputs.push_back(
current_vertex_program.rsx_vertex_inputs.push_back(
{index,
rsx::method_registers.register_vertex_info[index].size,
rsx::method_registers.register_vertex_info[index].frequency,

View File

@ -565,7 +565,7 @@ namespace vk
}
cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format,
std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled,
const rsx::texture_upload_context context, std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled,
std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) override
{
auto section = create_new_texture(cmd, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, type,
@ -578,7 +578,10 @@ namespace vk
vk::enter_uninterruptible();
vk::copy_mipmaped_image_using_buffer(cmd, image->value, subresource_layout, gcm_format, swizzled, mipmaps, subres_range.aspectMask,
//Swizzling is ignored for blit engine copy and emulated using a swapped order image view
bool input_swizzled = (context == rsx::texture_upload_context::blit_engine_src) ? false : swizzled;
vk::copy_mipmaped_image_using_buffer(cmd, image->value, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask,
*m_texture_upload_heap, m_texture_upload_buffer);
vk::leave_uninterruptible();

View File

@ -70,7 +70,7 @@ namespace rsx
bool locked = false;
bool dirty = false;
inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) const
{
return (base1 < limit2 && base2 < limit1);
}
@ -133,12 +133,12 @@ namespace rsx
locked = false;
}
bool overlaps(std::pair<u32, u32> range)
bool overlaps(std::pair<u32, u32> range) const
{
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
}
bool overlaps(u32 address)
bool overlaps(u32 address) const
{
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
}
@ -148,7 +148,7 @@ namespace rsx
* ignore_protection_range - if true, the test should not check against the aligned protection range, instead
* tests against actual range of contents in memory
*/
bool overlaps(std::pair<u32, u32> range, bool ignore_protection_range)
bool overlaps(std::pair<u32, u32> range, bool ignore_protection_range) const
{
if (!ignore_protection_range)
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
@ -160,7 +160,7 @@ namespace rsx
* Check if the page containing the address tramples this section. Also compares a former trampled page range to compare
* If true, returns the range <min, max> with updated invalid range
*/
std::tuple<bool, std::pair<u32, u32>> overlaps_page(std::pair<u32, u32> old_range, u32 address)
std::tuple<bool, std::pair<u32, u32>> overlaps_page(std::pair<u32, u32> old_range, u32 address) const
{
const u32 page_base = address & ~4095;
const u32 page_limit = address + 4096;
@ -204,7 +204,7 @@ namespace rsx
return (cpu_address_base == cpu_address && cpu_address_range == size);
}
std::pair<u32, u32> get_min_max(std::pair<u32, u32> current_min_max)
std::pair<u32, u32> get_min_max(std::pair<u32, u32> current_min_max) const
{
u32 min = std::min(current_min_max.first, locked_address_base);
u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);