rsx: Optimizations

- Reimplement fragment program fetch and rewrite texture upload mechanism
-- All of these steps should only be done at most once per draw call
-- Eliminates continously checking the surface store for overlapping addresses as well

addenda - critical fixes
- gl: Bind TIU before starting texture operations as they will affect the currently bound texture
- vk: Reuse sampler objects if possible
- rsx: Support for depth resampling for depth textures obtained via blit engine

vk/rsx: Minor fixes
- Fix accidental imageview dereference when using WCB if texture memory occupies FB memory
- Invalidate dirty framebuffers (strict mode only)
- Normalize line endings because VS is dumb
This commit is contained in:
kd-11 2017-10-30 15:27:22 +03:00
parent 865bb47462
commit 173d05b54f
15 changed files with 549 additions and 253 deletions

View File

@ -522,6 +522,12 @@ namespace rsx
*/
void invalidate_surface_address(u32 addr, bool depth)
{
if (address_is_bound(addr, depth))
{
LOG_ERROR(RSX, "Cannot invalidate a currently bound render target!");
return;
}
if (!depth)
{
auto It = m_render_targets_storage.find(addr);

View File

@ -25,6 +25,14 @@ namespace rsx
framebuffer_storage = 3
};
//Sampled image descriptor
struct sampled_image_descriptor_base
{
texture_upload_context upload_context = texture_upload_context::shader_read;
bool is_depth_texture = false;
f32 internal_scale = 1.f;
};
struct cached_texture_section : public rsx::buffered_section
{
u16 width;
@ -204,6 +212,22 @@ namespace rsx
u32 address_range = 0;
};
struct sampled_image_descriptor : public sampled_image_descriptor_base
{
image_view_type image_handle = 0;
sampled_image_descriptor()
{}
sampled_image_descriptor(image_view_type handle, const texture_upload_context ctx, const bool is_depth, const f32 scale)
{
image_handle = handle;
upload_context = ctx;
is_depth_texture = is_depth;
internal_scale = scale;
}
};
private:
//Internal implementation methods and helpers
@ -567,6 +591,15 @@ namespace rsx
region.set_dirty(false);
no_access_range = region.get_min_max(no_access_range);
}
else
{
if (region.get_context() != texture_upload_context::framebuffer_storage)
{
//This space was being used for other purposes other than framebuffer storage
//Delete used resources before attaching it to framebuffer memory
free_texture_section(region);
}
}
region.protect(utils::protection::no);
region.create(width, height, 1, 1, nullptr, image, pitch, false, std::forward<Args>(extras)...);
@ -831,7 +864,7 @@ namespace rsx
}
template <typename RsxTextureType, typename surface_store_type, typename ...Args>
image_view_type upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras)
sampled_image_descriptor upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras)
{
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
const u32 tex_size = (u32)get_texture_size(tex);
@ -841,7 +874,7 @@ namespace rsx
if (!texaddr || !tex_size)
{
LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, tex_size);
return 0;
return {};
}
const auto extended_dimension = tex.get_extended_texture_dimension();
@ -857,6 +890,7 @@ namespace rsx
if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d)
LOG_ERROR(RSX, "Texture resides in render target memory, but requested type is not 2D (%d)", (u32)extended_dimension);
f32 internal_scale = (f32)texptr->get_native_pitch() / tex.pitch();
for (const auto& tex : m_rtts.m_bound_render_targets)
{
if (std::get<0>(tex) == texaddr)
@ -864,7 +898,7 @@ namespace rsx
if (g_cfg.video.strict_rendering_mode)
{
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height());
return{ create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()), texture_upload_context::framebuffer_storage, false, internal_scale };
}
else
{
@ -875,8 +909,10 @@ namespace rsx
}
}
return texptr->get_view();
return{ texptr->get_view(), texture_upload_context::framebuffer_storage, false, internal_scale };
}
else
m_rtts.invalidate_surface_address(texaddr, false);
}
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
@ -886,12 +922,13 @@ namespace rsx
if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d)
LOG_ERROR(RSX, "Texture resides in depth buffer memory, but requested type is not 2D (%d)", (u32)extended_dimension);
f32 internal_scale = (f32)texptr->get_native_pitch() / tex.pitch();
if (texaddr == std::get<0>(m_rtts.m_bound_depth_stencil))
{
if (g_cfg.video.strict_rendering_mode)
{
LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr);
return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height());
return{ create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()), texture_upload_context::framebuffer_storage, true, internal_scale };
}
else
{
@ -900,8 +937,10 @@ namespace rsx
}
}
return texptr->get_view();
return{ texptr->get_view(), texture_upload_context::framebuffer_storage, true, internal_scale };
}
else
m_rtts.invalidate_surface_address(texaddr, true);
}
}
@ -945,10 +984,14 @@ namespace rsx
const u32 internal_width = (const u32)(tex_width * internal_scale);
const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, internal_width, tex_height, tex_pitch, true);
if (rsc.surface/* && test_framebuffer(texaddr)*/)
if (rsc.surface)
{
//TODO: Check that this region is not cpu-dirty before doing a copy
if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d)
if (!test_framebuffer(texaddr))
{
m_rtts.invalidate_surface_address(texaddr, rsc.is_depth_surface);
}
else if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d)
{
LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d",
texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height());
@ -965,16 +1008,16 @@ namespace rsx
insert_texture_barrier();
}
return rsc.surface->get_view();
return{ rsc.surface->get_view(), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f };
}
else return create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true));
else return{ create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f };
}
else
{
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
return create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true));
return{ create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f };
}
}
}
@ -987,7 +1030,7 @@ namespace rsx
auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height, depth);
if (cached_texture)
{
return cached_texture->get_raw_view();
return{ cached_texture->get_raw_view(), cached_texture->get_context(), cached_texture->is_depth_texture(), 1.f };
}
if ((!blit_engine_incompatibility_warning_raised && g_cfg.video.use_gpu_texture_scaling) || is_hw_blit_engine_compatible(format))
@ -1030,7 +1073,7 @@ namespace rsx
auto src_image = surface->get_raw_texture();
if (auto result = create_temporary_subresource_view(cmd, &src_image, format, offset_x, offset_y, tex_width, tex_height))
return result;
return{ result, texture_upload_context::blit_engine_dst, surface->is_depth_texture(), 1.f };
}
}
}
@ -1048,8 +1091,9 @@ namespace rsx
invalidate_range_impl_base(texaddr, tex_size, false, false, false, true, std::forward<Args>(extras)...);
m_texture_memory_in_use += (tex_pitch * tex_height);
return upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
return{ upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view(),
texture_upload_context::shader_read, false, 1.f };
}
template <typename surface_store_type, typename blitter_type, typename ...Args>

View File

@ -54,7 +54,7 @@ void D3D12GSRender::load_program()
};
get_current_vertex_program();
get_current_fragment_program(rtt_lookup_func);
get_current_fragment_program_legacy(rtt_lookup_func);
if (!current_fragment_program.valid)
return;

View File

@ -347,6 +347,72 @@ void GLGSRender::end()
std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer();
//Load textures
{
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
std::lock_guard<std::mutex> lock(m_sampler_mutex);
void* unused = nullptr;
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
if (m_samplers_dirty || m_textures_dirty[i])
{
if (!fs_sampler_state[i])
fs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + i);
*sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.fragment_textures[i], m_rtts);
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
GLenum target = get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]);
glBindTexture(target, sampler_state->image_handle);
}
else
{
*sampler_state = {};
}
m_textures_dirty[i] = false;
}
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
{
int texture_index = i + rsx::limits::fragment_textures_count;
if (m_samplers_dirty || m_vertex_textures_dirty[i])
{
if (!vs_sampler_state[i])
vs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
if (rsx::method_registers.vertex_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + texture_index);
*sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.vertex_textures[i], m_rtts);
glBindTexture(GL_TEXTURE_2D, static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get())->image_handle);
}
else
*sampler_state = {};
m_vertex_textures_dirty[i] = false;
}
}
m_samplers_dirty.store(false);
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
}
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
@ -462,43 +528,6 @@ void GLGSRender::end()
glEnable(GL_SCISSOR_TEST);
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
//Setup textures
//Setting unused texture to 0 is not needed, but makes program validation happy if we choose to enforce it
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
int location;
if (rsx::method_registers.fragment_textures[i].enabled() && m_program->uniforms.has_location("tex" + std::to_string(i), &location))
{
m_gl_texture_cache.upload_and_bind_texture(i, get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]), rsx::method_registers.fragment_textures[i], m_rtts);
if (m_textures_dirty[i])
{
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
m_textures_dirty[i] = false;
}
}
}
//Vertex textures
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
{
int texture_index = i + rsx::limits::fragment_textures_count;
int location;
if (!rsx::method_registers.vertex_textures[i].enabled())
continue;
if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
{
m_gl_texture_cache.upload_and_bind_texture(texture_index, GL_TEXTURE_2D, rsx::method_registers.vertex_textures[i], m_rtts);
}
}
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
if (g_cfg.video.debug_output)
@ -952,44 +981,16 @@ bool GLGSRender::do_method(u32 cmd, u32 arg)
bool GLGSRender::check_program_state()
{
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple<bool, u16>
{
gl::render_target *surface = nullptr;
if (!is_depth)
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
else
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
const bool dirty_framebuffer = (surface != nullptr && !m_gl_texture_cache.test_framebuffer(texaddr));
if (dirty_framebuffer || !surface)
{
if (is_depth && m_gl_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex)))
return std::make_tuple(true, 0);
if (dirty_framebuffer)
return std::make_tuple(false, 0);
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0);
surface = rsc.surface;
}
return std::make_tuple(true, surface->get_native_pitch());
};
get_current_fragment_program(rtt_lookup_func);
if (current_fragment_program.valid == false)
return false;
get_current_vertex_program();
return true;
return (rsx::method_registers.shader_program_address() != 0);
}
void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
{
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;
get_current_vertex_program();
auto &fragment_program = current_fragment_program;
auto &vertex_program = current_vertex_program;
@ -1061,7 +1062,7 @@ void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
}
m_transform_constants_dirty = false;
}
}
void GLGSRender::flip(int buffer)
{
@ -1228,6 +1229,11 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
if (!result.violation_handled)
return false;
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
if (result.num_flushable > 0)
{
work_item &task = post_flush_request(address, result);
@ -1249,7 +1255,13 @@ void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
{
//Discard all memory in that range without bothering with writeback (Force it for strict?)
if (m_gl_texture_cache.invalidate_range(address_base, size, true, true, false).violation_handled)
{
m_gl_texture_cache.purge_dirty();
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
}
}
void GLGSRender::do_local_task()
@ -1296,6 +1308,7 @@ void GLGSRender::synchronize_buffers()
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
{
m_samplers_dirty.store(true);
return m_gl_texture_cache.blit(src, dst, interpolate, m_rtts);
}

View File

@ -310,33 +310,6 @@ struct driver_state
}
};
struct sw_ring_buffer
{
std::vector<u8> data;
u32 ring_pos = 0;
u32 ring_length = 0;
sw_ring_buffer(u32 size)
{
data.resize(size);
ring_length = size;
}
void* get(u32 dwords)
{
const u32 required = (dwords << 2);
if ((ring_pos + required) > ring_length)
{
ring_pos = 0;
return data.data();
}
void *result = data.data() + ring_pos;
ring_pos += required;
return result;
}
};
class GLGSRender : public GSRender
{
private:
@ -405,6 +378,11 @@ private:
const u32 occlusion_query_count = 128;
std::array<occlusion_query_info, 128> occlusion_query_data = {};
std::mutex m_sampler_mutex;
std::atomic_bool m_samplers_dirty = {true};
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {};
public:
GLGSRender();

View File

@ -783,15 +783,5 @@ namespace gl
void* unused = nullptr;
return upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter);
}
template<typename RsxTextureType>
void upload_and_bind_texture(int index, GLenum target, RsxTextureType &tex, gl_render_targets &m_rtts)
{
glActiveTexture(GL_TEXTURE0 + index);
void* unused = nullptr;
auto id = upload_texture(unused, tex, m_rtts);
glBindTexture(target, id);
}
};
}

View File

@ -7,6 +7,7 @@
#include "Emu/Cell/PPUCallback.h"
#include "Common/BufferUtils.h"
#include "Common/texture_cache.h"
#include "rsx_methods.h"
#include "rsx_utils.h"
@ -243,6 +244,7 @@ namespace rsx
};
m_rtts_dirty = true;
memset(m_textures_dirty, -1, sizeof(m_textures_dirty));
memset(m_vertex_textures_dirty, -1, sizeof(m_vertex_textures_dirty));
m_transform_constants_dirty = true;
}
@ -1321,7 +1323,108 @@ namespace rsx
return result;
}
void thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info)
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
{
auto &result = current_fragment_program = {};
const u32 shader_program = rsx::method_registers.shader_program_address();
if (shader_program == 0)
return;
const u32 program_location = (shader_program & 0x3) - 1;
const u32 program_offset = (shader_program & ~0x3);
result.offset = program_offset;
result.addr = vm::base(rsx::get_address(program_offset, program_location));
result.valid = true;
result.ctrl = rsx::method_registers.shader_control();
result.unnormalized_coords = 0;
result.front_back_color_enabled = !rsx::method_registers.two_side_light_en();
result.back_color_diffuse_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_BACKDIFFUSE);
result.back_color_specular_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_BACKSPECULAR);
result.front_color_diffuse_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTDIFFUSE);
result.front_color_specular_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTSPECULAR);
result.redirected_textures = 0;
result.shadow_textures = 0;
std::array<texture_dimension_extended, 16> texture_dimensions;
const auto resolution_scale = rsx::get_resolution_scale();
for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
auto &tex = rsx::method_registers.fragment_textures[i];
result.texture_scale[i][0] = 1.f;
result.texture_scale[i][1] = 1.f;
result.textures_alpha_kill[i] = 0;
result.textures_zfunc[i] = 0;
if (!tex.enabled())
{
texture_dimensions[i] = texture_dimension_extended::texture_dimension_2d;
}
else
{
texture_dimensions[i] = tex.get_extended_texture_dimension();
if (tex.alpha_kill_enabled())
{
//alphakill can be ignored unless a valid comparison function is set
const rsx::comparison_function func = (rsx::comparison_function)tex.zfunc();
if (func < rsx::comparison_function::always && func > rsx::comparison_function::never)
{
result.textures_alpha_kill[i] = 1;
result.textures_zfunc[i] = (u8)func;
}
}
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
const u32 raw_format = tex.format();
if (raw_format & CELL_GCM_TEXTURE_UN)
result.unnormalized_coords |= (1 << i);
if (sampler_descriptors[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)
{
if (raw_format & CELL_GCM_TEXTURE_UN)
{
result.texture_scale[i][0] = (resolution_scale * sampler_descriptors[i]->internal_scale);
result.texture_scale[i][1] = resolution_scale;
}
}
if (sampler_descriptors[i]->is_depth_texture)
{
const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
switch (format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_D8R8G8B8:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
result.redirected_textures |= (1 << i);
break;
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
{
const auto compare_mode = (rsx::comparison_function)tex.zfunc();
if (result.textures_alpha_kill[i] == 0 &&
compare_mode < rsx::comparison_function::always &&
compare_mode > rsx::comparison_function::never)
result.shadow_textures |= (1 << i);
break;
}
default:
LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
}
}
}
}
result.set_texture_dimension(texture_dimensions);
}
void thread::get_current_fragment_program_legacy(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info)
{
auto &result = current_fragment_program = {};
@ -1412,8 +1515,8 @@ namespace rsx
case CELL_GCM_TEXTURE_D8R8G8B8:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
result.redirected_textures |= (1 << i);
break;
result.redirected_textures |= (1 << i);
break;
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
@ -1426,7 +1529,7 @@ namespace rsx
break;
}
default:
LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
}
}
}

View File

@ -135,6 +135,8 @@ namespace rsx
std::array<attribute_buffer_placement, 16> attribute_placement;
};
struct sampled_image_descriptor_base;
class thread : public named_thread
{
std::shared_ptr<thread_ctrl> m_vblank_thread;
@ -188,6 +190,7 @@ namespace rsx
bool m_rtts_dirty;
bool m_transform_constants_dirty;
bool m_textures_dirty[16];
bool m_vertex_textures_dirty[4];
protected:
std::array<u32, 4> get_color_surface_addresses() const;
@ -208,7 +211,9 @@ namespace rsx
* get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth
* returns whether surface is a render target and surface pitch in native format
*/
void get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info);
void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
void get_current_fragment_program_legacy(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info);
public:
double fps_limit = 59.94;

View File

@ -712,6 +712,14 @@ VKGSRender::~VKGSRender()
m_rtts.destroy();
m_texture_cache.destroy();
//Sampler handles
for (auto& handle : fs_sampler_handles)
handle.reset();
for (auto& handle : vs_sampler_handles)
handle.reset();
//Overlay text handler
m_text_writer.reset();
//Pipeline descriptors
@ -750,6 +758,11 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
if (!result.violation_handled)
return false;
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
if (result.num_flushable > 0)
{
const bool is_rsxthr = std::this_thread::get_id() == rsx_thread;
@ -845,6 +858,23 @@ void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
*m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()).violation_handled)
{
m_texture_cache.purge_dirty();
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
}
}
void VKGSRender::notify_tile_unbound(u32 tile)
{
//TODO: Handle texture writeback
//u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location);
//on_notify_memory_unmapped(addr, tiles[tile].size);
//m_rtts.invalidate_surface_address(addr, false);
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
}
@ -995,8 +1025,6 @@ void VKGSRender::end()
return;
}
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
//Load program here since it is dependent on vertex state
if (!check_program_status())
{
@ -1005,24 +1033,152 @@ void VKGSRender::end()
return;
}
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
m_setup_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
//Close current pass to avoid conflict with texture functions
close_render_pass();
//Programs data is dependent on vertex state
std::chrono::time_point<steady_clock> vertex_start = state_check_end;
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
std::chrono::time_point<steady_clock> textures_start = vertex_end;
//Load textures
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
if (m_samplers_dirty || m_textures_dirty[i])
{
if (!fs_sampler_state[i])
fs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled())
{
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
VkCompareOp depth_compare = fs_sampler_state[i]->is_depth_texture ? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true) : VK_COMPARE_OP_NEVER;
bool replace = !fs_sampler_handles[i];
VkFilter min_filter;
VkSamplerMipmapMode mip_mode;
f32 min_lod = 0.f, max_lod = 0.f;
f32 lod_bias = 0.f;
const f32 af_level = g_cfg.video.anisotropic_level_override > 0 ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso());
const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s());
const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t());
const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r());
const auto unnormalized_coords = !!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN);
const auto mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter());
const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color());
std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter());
if (rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1)
{
min_lod = (float)(rsx::method_registers.fragment_textures[i].min_lod() >> 8);
max_lod = (float)(rsx::method_registers.fragment_textures[i].max_lod() >> 8);
lod_bias = rsx::method_registers.fragment_textures[i].bias();
}
else
{
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
}
if (fs_sampler_handles[i])
{
if (!fs_sampler_handles[i]->matches(wrap_s, wrap_t, wrap_r, unnormalized_coords, lod_bias, af_level, min_lod, max_lod,
min_filter, mag_filter, mip_mode, border_color, fs_sampler_state[i]->is_depth_texture, depth_compare))
{
m_current_frame->samplers_to_clean.push_back(std::move(fs_sampler_handles[i]));
replace = true;
}
}
if (replace)
{
fs_sampler_handles[i] = std::make_unique<vk::sampler>(*m_device, wrap_s, wrap_t, wrap_r, unnormalized_coords, lod_bias, af_level, min_lod, max_lod,
min_filter, mag_filter, mip_mode, border_color, fs_sampler_state[i]->is_depth_texture, depth_compare);
}
}
else
{
*sampler_state = {};
}
m_textures_dirty[i] = false;
}
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
{
int texture_index = i + rsx::limits::fragment_textures_count;
if (m_samplers_dirty || m_vertex_textures_dirty[i])
{
if (!vs_sampler_state[i])
vs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
if (rsx::method_registers.vertex_textures[i].enabled())
{
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
bool replace = !vs_sampler_handles[i];
const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN);
const auto min_lod = (f32)rsx::method_registers.vertex_textures[i].min_lod();
const auto max_lod = (f32)rsx::method_registers.vertex_textures[i].max_lod();
const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color());
if (vs_sampler_handles[i])
{
if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
unnormalized_coords, 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color))
{
m_current_frame->samplers_to_clean.push_back(std::move(vs_sampler_handles[i]));
replace = true;
}
}
if (replace)
{
vs_sampler_handles[i] = std::make_unique<vk::sampler>(
*m_device,
VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
unnormalized_coords,
0.f, 1.f, min_lod, max_lod,
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color);
}
}
else
*sampler_state = {};
m_vertex_textures_dirty[i] = false;
}
}
m_samplers_dirty.store(false);
}
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
//Load program
std::chrono::time_point<steady_clock> program_start = vertex_end;
std::chrono::time_point<steady_clock> program_start = textures_end;
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
m_program->bind_uniform(m_persistent_attribute_storage, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(m_volatile_attribute_storage, "volatile_input_stream", m_current_frame->descriptor_set);
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
//Close current pass to avoid conflict with texture functions
close_render_pass();
if (g_cfg.video.strict_rendering_mode)
{
auto copy_rtt_contents = [&](vk::render_target* surface)
@ -1079,7 +1235,7 @@ void VKGSRender::end()
}
}
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
textures_start = steady_clock::now();
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
@ -1091,48 +1247,15 @@ void VKGSRender::end()
continue;
}
vk::image_view *texture0 = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
if (!texture0)
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (!sampler_state->image_handle)
{
LOG_ERROR(RSX, "Texture upload failed to texture index %d. Binding null sampler.", i);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
continue;
}
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
VkBool32 is_depth_texture = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8);
VkCompareOp depth_compare = is_depth_texture? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true): VK_COMPARE_OP_NEVER;
VkFilter min_filter;
VkSamplerMipmapMode mip_mode;
float min_lod = 0.f, max_lod = 0.f;
float lod_bias = 0.f;
std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter());
if (rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1)
{
min_lod = (float)(rsx::method_registers.fragment_textures[i].min_lod() >> 8);
max_lod = (float)(rsx::method_registers.fragment_textures[i].max_lod() >> 8);
lod_bias = rsx::method_registers.fragment_textures[i].bias();
}
else
{
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
}
f32 af_level = g_cfg.video.anisotropic_level_override > 0 ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso());
m_current_frame->samplers_to_clean.push_back(std::make_unique<vk::sampler>(
*m_device,
vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()),
!!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN),
lod_bias, af_level, min_lod, max_lod,
min_filter, vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()), mip_mode, vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()),
is_depth_texture, depth_compare));
m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
m_program->bind_uniform({ fs_sampler_handles[i]->value, sampler_state->image_handle->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
}
}
@ -1146,28 +1269,19 @@ void VKGSRender::end()
continue;
}
vk::image_view *texture0 = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
if (!texture0)
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
if (!sampler_state->image_handle)
{
LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
continue;
}
m_current_frame->samplers_to_clean.push_back(std::make_unique<vk::sampler>(
*m_device,
VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
!!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN),
0.f, 1.f, (f32)rsx::method_registers.vertex_textures[i].min_lod(), (f32)rsx::method_registers.vertex_textures[i].max_lod(),
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color())
));
m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
m_program->bind_uniform({ vs_sampler_handles[i]->value, sampler_state->image_handle->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
}
}
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
textures_end = steady_clock::now();
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
//While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point
@ -1828,36 +1942,13 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
bool VKGSRender::check_program_status()
{
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple<bool, u16>
{
vk::render_target *surface = nullptr;
return (rsx::method_registers.shader_program_address() != 0);
}
if (!is_depth)
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
else
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
const bool dirty_framebuffer = (surface != nullptr && !m_texture_cache.test_framebuffer(texaddr));
if (dirty_framebuffer || !surface)
{
if (is_depth && m_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex)))
return std::make_tuple(true, 0);
if (dirty_framebuffer)
return std::make_tuple(false, 0);
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0);
surface = rsc.surface;
}
return std::make_tuple(true, surface->native_pitch);
};
get_current_fragment_program(rtt_lookup_func);
if (!current_fragment_program.valid) return false;
void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
{
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;
get_current_vertex_program();
@ -2033,7 +2124,7 @@ bool VKGSRender::check_program_status()
if (update_stencil_info_front)
{
VkStencilFaceFlags face_flag = (update_stencil_info_back)? VK_STENCIL_FACE_FRONT_BIT: VK_STENCIL_FRONT_AND_BACK;
VkStencilFaceFlags face_flag = (update_stencil_info_back) ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FRONT_AND_BACK;
vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask());
vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask());
@ -2053,14 +2144,6 @@ bool VKGSRender::check_program_status()
vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
}
return true;
}
void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
{
auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program;
const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program);
const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float));
const size_t required_mem = 512 + 8192 + fragment_buffer_sz;
@ -2733,13 +2816,7 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
m_current_command_buffer->begin();
return result;
}
m_samplers_dirty.store(true);
void VKGSRender::notify_tile_unbound(u32 tile)
{
//TODO: Handle texture writeback
//u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location);
//on_notify_memory_unmapped(addr, tiles[tile].size);
//m_rtts.invalidate_surface_address(addr, false);
return result;
}

View File

@ -129,6 +129,16 @@ private:
std::unique_ptr<vk::text_writer> m_text_writer;
std::mutex m_sampler_mutex;
std::atomic_bool m_samplers_dirty = { true };
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {};
std::array<std::unique_ptr<vk::sampler>, rsx::limits::fragment_textures_count> fs_sampler_handles;
std::array<std::unique_ptr<vk::sampler>, rsx::limits::vertex_textures_count> vs_sampler_handles;
VkBufferView m_persistent_attribute_storage;
VkBufferView m_volatile_attribute_storage;
public:
//vk::fbo draw_fbo;
std::unique_ptr<vk::vertex_cache> m_vertex_cache;

View File

@ -576,7 +576,7 @@ namespace vk
VkSamplerCreateInfo info = {};
sampler(VkDevice dev, VkSamplerAddressMode clamp_u, VkSamplerAddressMode clamp_v, VkSamplerAddressMode clamp_w,
bool unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod,
VkBool32 unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod,
VkFilter min_filter, VkFilter mag_filter, VkSamplerMipmapMode mipmap_mode, VkBorderColor border_color,
VkBool32 depth_compare = false, VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER)
: m_device(dev)
@ -607,6 +607,21 @@ namespace vk
vkDestroySampler(m_device, value, nullptr);
}
bool matches(VkSamplerAddressMode clamp_u, VkSamplerAddressMode clamp_v, VkSamplerAddressMode clamp_w,
VkBool32 unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod,
VkFilter min_filter, VkFilter mag_filter, VkSamplerMipmapMode mipmap_mode, VkBorderColor border_color,
VkBool32 depth_compare = false, VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER)
{
if (info.magFilter != mag_filter || info.minFilter != min_filter || info.mipmapMode != mipmap_mode ||
info.addressModeU != clamp_u || info.addressModeV != clamp_v || info.addressModeW != clamp_w ||
info.compareEnable != depth_compare || info.unnormalizedCoordinates != unnormalized_coordinates ||
info.mipLodBias != mipLodBias || info.maxAnisotropy != max_anisotropy || info.maxLod != max_lod ||
info.minLod != min_lod || info.compareOp != depth_compare_mode || info.borderColor != border_color)
return false;
return true;
}
sampler(const sampler&) = delete;
sampler(sampler&&) = delete;
private:

View File

@ -23,7 +23,7 @@ namespace vk
if (memcmp(&att_state[0], &other.att_state[0], sizeof(VkPipelineColorBlendAttachmentState)))
return false;
if (render_pass != other.render_pass)
if (render_pass_location != other.render_pass_location)
return false;
if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))

View File

@ -47,10 +47,17 @@ namespace vk
this->depth = depth;
this->mipmaps = mipmaps;
uploaded_image_view.reset(view);
vram_texture = image;
if (managed)
{
managed_texture.reset(image);
uploaded_image_view.reset(view);
}
else
{
verify(HERE), uploaded_image_view.get() == nullptr;
}
if (managed) managed_texture.reset(image);
vram_texture = image;
//TODO: Properly compute these values
if (rsx_pitch > 0)
@ -157,15 +164,27 @@ namespace vk
const u16 internal_width = std::min(width, rsx::apply_resolution_scale(width, true));
const u16 internal_height = std::min(height, rsx::apply_resolution_scale(height, true));
VkImageAspectFlags aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
switch (vram_texture->info.format)
{
case VK_FORMAT_D16_UNORM:
aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT;
break;
case VK_FORMAT_D24_UNORM_S8_UINT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
break;
}
VkBufferImageCopy copyRegion = {};
copyRegion.bufferOffset = 0;
copyRegion.bufferRowLength = internal_width;
copyRegion.bufferImageHeight = internal_height;
copyRegion.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copyRegion.imageSubresource = {aspect_flag, 0, 0, 1};
copyRegion.imageOffset = {};
copyRegion.imageExtent = {internal_width, internal_height, 1};
VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VkImageSubresourceRange subresource_range = { aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 };
VkImageLayout layout = vram_texture->current_layout;
change_image_layout(cmd, vram_texture, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
@ -246,6 +265,10 @@ namespace vk
bool swap_bytes = false;
switch (vram_texture->info.format)
{
case VK_FORMAT_D32_SFLOAT_S8_UINT:
case VK_FORMAT_D24_UNORM_S8_UINT:
//TODO: Hardware tests to determine correct memory layout
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R32_SFLOAT:
@ -312,6 +335,19 @@ namespace vk
return vram_texture->info.format == tex->info.format;
}
bool is_depth_texture() const
{
switch (vram_texture->info.format)
{
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
case VK_FORMAT_D24_UNORM_S8_UINT:
return true;
default:
return false;
}
}
u64 get_sync_timestamp() const
{
return sync_timestamp;
@ -453,7 +489,11 @@ namespace vk
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
vk::change_image_layout(cmd, source, old_src_layout, subresource_range);
const u32 resource_memory = w * h * 4; //Rough approximate
m_discardable_storage.push_back({ image, view });
m_discardable_storage.back().block_size = resource_memory;
m_discarded_memory_size += resource_memory;
return m_discardable_storage.back().view.get();
}
@ -733,7 +773,7 @@ namespace vk
}
template<typename RsxTextureType>
image_view* _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts)
sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts)
{
return upload_texture(cmd, tex, m_rtts, *m_device, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
}

View File

@ -237,7 +237,8 @@ VKGSRender::upload_vertex_data()
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX;
VkBufferView persistent_view = VK_NULL_HANDLE, volatile_view = VK_NULL_HANDLE;
m_persistent_attribute_storage = VK_NULL_HANDLE;
m_volatile_attribute_storage = VK_NULL_HANDLE;
if (required.first > 0)
{
@ -277,11 +278,11 @@ VKGSRender::upload_vertex_data()
}
}
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
m_persistent_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value;
}
else
{
persistent_view = null_buffer_view->value;
m_persistent_attribute_storage = null_buffer_view->value;
}
if (required.second > 0)
@ -290,16 +291,13 @@ VKGSRender::upload_vertex_data()
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device,
m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second));
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
m_volatile_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value;
}
else
{
volatile_view = null_buffer_view->value;
m_volatile_attribute_storage = null_buffer_view->value;
}
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_view, "volatile_input_stream", m_current_frame->descriptor_set);
//Write all the data once if possible
if (required.first && required.second && volatile_offset > persistent_offset)
{

View File

@ -469,6 +469,15 @@ namespace rsx
rsx->m_textures_dirty[index] = true;
}
};
template<u32 index>
struct set_vertex_texture_dirty_bit
{
static void impl(thread* rsx, u32 _reg, u32 arg)
{
rsx->m_vertex_textures_dirty[index] = true;
}
};
}
namespace nv308a
@ -1539,6 +1548,14 @@ namespace rsx
bind_range<NV4097_SET_TEXTURE_FILTER, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_IMAGE_RECT, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_BORDER_COLOR, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_OFFSET, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_FORMAT, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_ADDRESS, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_CONTROL0, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_CONTROL3, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_FILTER, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_IMAGE_RECT, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_BORDER_COLOR, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind<NV4097_SET_RENDER_ENABLE, nv4097::set_render_mode>();
bind<NV4097_SET_ZCULL_EN, nv4097::set_zcull_render_enable>();
bind<NV4097_SET_ZCULL_STATS_ENABLE, nv4097::set_zcull_stats_enable>();