mirror of https://github.com/RPCS3/rpcs3.git
rsx: Optimizations
- Reimplement fragment program fetch and rewrite texture upload mechanism -- All of these steps should only be done at most once per draw call -- Eliminates continously checking the surface store for overlapping addresses as well addenda - critical fixes - gl: Bind TIU before starting texture operations as they will affect the currently bound texture - vk: Reuse sampler objects if possible - rsx: Support for depth resampling for depth textures obtained via blit engine vk/rsx: Minor fixes - Fix accidental imageview dereference when using WCB if texture memory occupies FB memory - Invalidate dirty framebuffers (strict mode only) - Normalize line endings because VS is dumb
This commit is contained in:
parent
865bb47462
commit
173d05b54f
|
@ -522,6 +522,12 @@ namespace rsx
|
|||
*/
|
||||
void invalidate_surface_address(u32 addr, bool depth)
|
||||
{
|
||||
if (address_is_bound(addr, depth))
|
||||
{
|
||||
LOG_ERROR(RSX, "Cannot invalidate a currently bound render target!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!depth)
|
||||
{
|
||||
auto It = m_render_targets_storage.find(addr);
|
||||
|
|
|
@ -25,6 +25,14 @@ namespace rsx
|
|||
framebuffer_storage = 3
|
||||
};
|
||||
|
||||
//Sampled image descriptor
|
||||
struct sampled_image_descriptor_base
|
||||
{
|
||||
texture_upload_context upload_context = texture_upload_context::shader_read;
|
||||
bool is_depth_texture = false;
|
||||
f32 internal_scale = 1.f;
|
||||
};
|
||||
|
||||
struct cached_texture_section : public rsx::buffered_section
|
||||
{
|
||||
u16 width;
|
||||
|
@ -204,6 +212,22 @@ namespace rsx
|
|||
u32 address_range = 0;
|
||||
};
|
||||
|
||||
struct sampled_image_descriptor : public sampled_image_descriptor_base
|
||||
{
|
||||
image_view_type image_handle = 0;
|
||||
|
||||
sampled_image_descriptor()
|
||||
{}
|
||||
|
||||
sampled_image_descriptor(image_view_type handle, const texture_upload_context ctx, const bool is_depth, const f32 scale)
|
||||
{
|
||||
image_handle = handle;
|
||||
upload_context = ctx;
|
||||
is_depth_texture = is_depth;
|
||||
internal_scale = scale;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
//Internal implementation methods and helpers
|
||||
|
||||
|
@ -567,6 +591,15 @@ namespace rsx
|
|||
region.set_dirty(false);
|
||||
no_access_range = region.get_min_max(no_access_range);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (region.get_context() != texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
//This space was being used for other purposes other than framebuffer storage
|
||||
//Delete used resources before attaching it to framebuffer memory
|
||||
free_texture_section(region);
|
||||
}
|
||||
}
|
||||
|
||||
region.protect(utils::protection::no);
|
||||
region.create(width, height, 1, 1, nullptr, image, pitch, false, std::forward<Args>(extras)...);
|
||||
|
@ -831,7 +864,7 @@ namespace rsx
|
|||
}
|
||||
|
||||
template <typename RsxTextureType, typename surface_store_type, typename ...Args>
|
||||
image_view_type upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras)
|
||||
sampled_image_descriptor upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras)
|
||||
{
|
||||
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
|
||||
const u32 tex_size = (u32)get_texture_size(tex);
|
||||
|
@ -841,7 +874,7 @@ namespace rsx
|
|||
if (!texaddr || !tex_size)
|
||||
{
|
||||
LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, tex_size);
|
||||
return 0;
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto extended_dimension = tex.get_extended_texture_dimension();
|
||||
|
@ -857,6 +890,7 @@ namespace rsx
|
|||
if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d)
|
||||
LOG_ERROR(RSX, "Texture resides in render target memory, but requested type is not 2D (%d)", (u32)extended_dimension);
|
||||
|
||||
f32 internal_scale = (f32)texptr->get_native_pitch() / tex.pitch();
|
||||
for (const auto& tex : m_rtts.m_bound_render_targets)
|
||||
{
|
||||
if (std::get<0>(tex) == texaddr)
|
||||
|
@ -864,7 +898,7 @@ namespace rsx
|
|||
if (g_cfg.video.strict_rendering_mode)
|
||||
{
|
||||
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
|
||||
return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height());
|
||||
return{ create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()), texture_upload_context::framebuffer_storage, false, internal_scale };
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -875,8 +909,10 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
return texptr->get_view();
|
||||
return{ texptr->get_view(), texture_upload_context::framebuffer_storage, false, internal_scale };
|
||||
}
|
||||
else
|
||||
m_rtts.invalidate_surface_address(texaddr, false);
|
||||
}
|
||||
|
||||
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
|
||||
|
@ -886,12 +922,13 @@ namespace rsx
|
|||
if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d)
|
||||
LOG_ERROR(RSX, "Texture resides in depth buffer memory, but requested type is not 2D (%d)", (u32)extended_dimension);
|
||||
|
||||
f32 internal_scale = (f32)texptr->get_native_pitch() / tex.pitch();
|
||||
if (texaddr == std::get<0>(m_rtts.m_bound_depth_stencil))
|
||||
{
|
||||
if (g_cfg.video.strict_rendering_mode)
|
||||
{
|
||||
LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr);
|
||||
return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height());
|
||||
return{ create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()), texture_upload_context::framebuffer_storage, true, internal_scale };
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -900,8 +937,10 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
return texptr->get_view();
|
||||
return{ texptr->get_view(), texture_upload_context::framebuffer_storage, true, internal_scale };
|
||||
}
|
||||
else
|
||||
m_rtts.invalidate_surface_address(texaddr, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -945,10 +984,14 @@ namespace rsx
|
|||
const u32 internal_width = (const u32)(tex_width * internal_scale);
|
||||
|
||||
const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, internal_width, tex_height, tex_pitch, true);
|
||||
if (rsc.surface/* && test_framebuffer(texaddr)*/)
|
||||
if (rsc.surface)
|
||||
{
|
||||
//TODO: Check that this region is not cpu-dirty before doing a copy
|
||||
if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d)
|
||||
if (!test_framebuffer(texaddr))
|
||||
{
|
||||
m_rtts.invalidate_surface_address(texaddr, rsc.is_depth_surface);
|
||||
}
|
||||
else if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d)
|
||||
{
|
||||
LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d",
|
||||
texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height());
|
||||
|
@ -965,16 +1008,16 @@ namespace rsx
|
|||
insert_texture_barrier();
|
||||
}
|
||||
|
||||
return rsc.surface->get_view();
|
||||
return{ rsc.surface->get_view(), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f };
|
||||
}
|
||||
else return create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
|
||||
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true));
|
||||
else return{ create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
|
||||
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f };
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
|
||||
return create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
|
||||
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true));
|
||||
return{ create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
|
||||
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -987,7 +1030,7 @@ namespace rsx
|
|||
auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height, depth);
|
||||
if (cached_texture)
|
||||
{
|
||||
return cached_texture->get_raw_view();
|
||||
return{ cached_texture->get_raw_view(), cached_texture->get_context(), cached_texture->is_depth_texture(), 1.f };
|
||||
}
|
||||
|
||||
if ((!blit_engine_incompatibility_warning_raised && g_cfg.video.use_gpu_texture_scaling) || is_hw_blit_engine_compatible(format))
|
||||
|
@ -1030,7 +1073,7 @@ namespace rsx
|
|||
|
||||
auto src_image = surface->get_raw_texture();
|
||||
if (auto result = create_temporary_subresource_view(cmd, &src_image, format, offset_x, offset_y, tex_width, tex_height))
|
||||
return result;
|
||||
return{ result, texture_upload_context::blit_engine_dst, surface->is_depth_texture(), 1.f };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1048,8 +1091,9 @@ namespace rsx
|
|||
invalidate_range_impl_base(texaddr, tex_size, false, false, false, true, std::forward<Args>(extras)...);
|
||||
|
||||
m_texture_memory_in_use += (tex_pitch * tex_height);
|
||||
return upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
|
||||
texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
|
||||
return{ upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
|
||||
texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view(),
|
||||
texture_upload_context::shader_read, false, 1.f };
|
||||
}
|
||||
|
||||
template <typename surface_store_type, typename blitter_type, typename ...Args>
|
||||
|
|
|
@ -54,7 +54,7 @@ void D3D12GSRender::load_program()
|
|||
};
|
||||
|
||||
get_current_vertex_program();
|
||||
get_current_fragment_program(rtt_lookup_func);
|
||||
get_current_fragment_program_legacy(rtt_lookup_func);
|
||||
|
||||
if (!current_fragment_program.valid)
|
||||
return;
|
||||
|
|
|
@ -347,6 +347,72 @@ void GLGSRender::end()
|
|||
std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
|
||||
std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer();
|
||||
|
||||
//Load textures
|
||||
{
|
||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
void* unused = nullptr;
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (m_samplers_dirty || m_textures_dirty[i])
|
||||
{
|
||||
if (!fs_sampler_state[i])
|
||||
fs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled())
|
||||
{
|
||||
glActiveTexture(GL_TEXTURE0 + i);
|
||||
|
||||
*sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
|
||||
|
||||
GLenum target = get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]);
|
||||
glBindTexture(target, sampler_state->image_handle);
|
||||
}
|
||||
else
|
||||
{
|
||||
*sampler_state = {};
|
||||
}
|
||||
|
||||
m_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
int texture_index = i + rsx::limits::fragment_textures_count;
|
||||
|
||||
if (m_samplers_dirty || m_vertex_textures_dirty[i])
|
||||
{
|
||||
if (!vs_sampler_state[i])
|
||||
vs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
glActiveTexture(GL_TEXTURE0 + texture_index);
|
||||
|
||||
*sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
glBindTexture(GL_TEXTURE_2D, static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get())->image_handle);
|
||||
}
|
||||
else
|
||||
*sampler_state = {};
|
||||
|
||||
m_vertex_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
m_samplers_dirty.store(false);
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
|
||||
//Load program here since it is dependent on vertex state
|
||||
|
||||
|
@ -462,43 +528,6 @@ void GLGSRender::end()
|
|||
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||
|
||||
//Setup textures
|
||||
//Setting unused texture to 0 is not needed, but makes program validation happy if we choose to enforce it
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
int location;
|
||||
if (rsx::method_registers.fragment_textures[i].enabled() && m_program->uniforms.has_location("tex" + std::to_string(i), &location))
|
||||
{
|
||||
m_gl_texture_cache.upload_and_bind_texture(i, get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]), rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
|
||||
if (m_textures_dirty[i])
|
||||
{
|
||||
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
|
||||
m_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Vertex textures
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
int texture_index = i + rsx::limits::fragment_textures_count;
|
||||
int location;
|
||||
|
||||
if (!rsx::method_registers.vertex_textures[i].enabled())
|
||||
continue;
|
||||
|
||||
if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
|
||||
{
|
||||
m_gl_texture_cache.upload_and_bind_texture(texture_index, GL_TEXTURE_2D, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||
|
||||
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
|
||||
|
||||
if (g_cfg.video.debug_output)
|
||||
|
@ -952,44 +981,16 @@ bool GLGSRender::do_method(u32 cmd, u32 arg)
|
|||
|
||||
bool GLGSRender::check_program_state()
|
||||
{
|
||||
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple<bool, u16>
|
||||
{
|
||||
gl::render_target *surface = nullptr;
|
||||
if (!is_depth)
|
||||
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
|
||||
else
|
||||
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
|
||||
|
||||
const bool dirty_framebuffer = (surface != nullptr && !m_gl_texture_cache.test_framebuffer(texaddr));
|
||||
if (dirty_framebuffer || !surface)
|
||||
{
|
||||
if (is_depth && m_gl_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex)))
|
||||
return std::make_tuple(true, 0);
|
||||
|
||||
if (dirty_framebuffer)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
|
||||
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
surface = rsc.surface;
|
||||
}
|
||||
|
||||
return std::make_tuple(true, surface->get_native_pitch());
|
||||
};
|
||||
|
||||
get_current_fragment_program(rtt_lookup_func);
|
||||
|
||||
if (current_fragment_program.valid == false)
|
||||
return false;
|
||||
|
||||
get_current_vertex_program();
|
||||
return true;
|
||||
return (rsx::method_registers.shader_program_address() != 0);
|
||||
}
|
||||
|
||||
void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
|
||||
{
|
||||
get_current_fragment_program(fs_sampler_state);
|
||||
verify(HERE), current_fragment_program.valid;
|
||||
|
||||
get_current_vertex_program();
|
||||
|
||||
auto &fragment_program = current_fragment_program;
|
||||
auto &vertex_program = current_vertex_program;
|
||||
|
||||
|
@ -1061,7 +1062,7 @@ void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
|
|||
}
|
||||
|
||||
m_transform_constants_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::flip(int buffer)
|
||||
{
|
||||
|
@ -1228,6 +1229,11 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
if (!result.violation_handled)
|
||||
return false;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
|
||||
if (result.num_flushable > 0)
|
||||
{
|
||||
work_item &task = post_flush_request(address, result);
|
||||
|
@ -1249,7 +1255,13 @@ void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
|||
{
|
||||
//Discard all memory in that range without bothering with writeback (Force it for strict?)
|
||||
if (m_gl_texture_cache.invalidate_range(address_base, size, true, true, false).violation_handled)
|
||||
{
|
||||
m_gl_texture_cache.purge_dirty();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::do_local_task()
|
||||
|
@ -1296,6 +1308,7 @@ void GLGSRender::synchronize_buffers()
|
|||
|
||||
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
|
||||
{
|
||||
m_samplers_dirty.store(true);
|
||||
return m_gl_texture_cache.blit(src, dst, interpolate, m_rtts);
|
||||
}
|
||||
|
||||
|
|
|
@ -310,33 +310,6 @@ struct driver_state
|
|||
}
|
||||
};
|
||||
|
||||
struct sw_ring_buffer
|
||||
{
|
||||
std::vector<u8> data;
|
||||
u32 ring_pos = 0;
|
||||
u32 ring_length = 0;
|
||||
|
||||
sw_ring_buffer(u32 size)
|
||||
{
|
||||
data.resize(size);
|
||||
ring_length = size;
|
||||
}
|
||||
|
||||
void* get(u32 dwords)
|
||||
{
|
||||
const u32 required = (dwords << 2);
|
||||
if ((ring_pos + required) > ring_length)
|
||||
{
|
||||
ring_pos = 0;
|
||||
return data.data();
|
||||
}
|
||||
|
||||
void *result = data.data() + ring_pos;
|
||||
ring_pos += required;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class GLGSRender : public GSRender
|
||||
{
|
||||
private:
|
||||
|
@ -405,6 +378,11 @@ private:
|
|||
const u32 occlusion_query_count = 128;
|
||||
std::array<occlusion_query_info, 128> occlusion_query_data = {};
|
||||
|
||||
std::mutex m_sampler_mutex;
|
||||
std::atomic_bool m_samplers_dirty = {true};
|
||||
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
|
||||
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {};
|
||||
|
||||
public:
|
||||
GLGSRender();
|
||||
|
||||
|
|
|
@ -783,15 +783,5 @@ namespace gl
|
|||
void* unused = nullptr;
|
||||
return upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter);
|
||||
}
|
||||
|
||||
template<typename RsxTextureType>
|
||||
void upload_and_bind_texture(int index, GLenum target, RsxTextureType &tex, gl_render_targets &m_rtts)
|
||||
{
|
||||
glActiveTexture(GL_TEXTURE0 + index);
|
||||
void* unused = nullptr;
|
||||
|
||||
auto id = upload_texture(unused, tex, m_rtts);
|
||||
glBindTexture(target, id);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "Emu/Cell/PPUCallback.h"
|
||||
|
||||
#include "Common/BufferUtils.h"
|
||||
#include "Common/texture_cache.h"
|
||||
#include "rsx_methods.h"
|
||||
#include "rsx_utils.h"
|
||||
|
||||
|
@ -243,6 +244,7 @@ namespace rsx
|
|||
};
|
||||
m_rtts_dirty = true;
|
||||
memset(m_textures_dirty, -1, sizeof(m_textures_dirty));
|
||||
memset(m_vertex_textures_dirty, -1, sizeof(m_vertex_textures_dirty));
|
||||
m_transform_constants_dirty = true;
|
||||
}
|
||||
|
||||
|
@ -1321,7 +1323,108 @@ namespace rsx
|
|||
return result;
|
||||
}
|
||||
|
||||
void thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info)
|
||||
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
|
||||
{
|
||||
auto &result = current_fragment_program = {};
|
||||
|
||||
const u32 shader_program = rsx::method_registers.shader_program_address();
|
||||
if (shader_program == 0)
|
||||
return;
|
||||
|
||||
const u32 program_location = (shader_program & 0x3) - 1;
|
||||
const u32 program_offset = (shader_program & ~0x3);
|
||||
|
||||
result.offset = program_offset;
|
||||
result.addr = vm::base(rsx::get_address(program_offset, program_location));
|
||||
result.valid = true;
|
||||
result.ctrl = rsx::method_registers.shader_control();
|
||||
result.unnormalized_coords = 0;
|
||||
result.front_back_color_enabled = !rsx::method_registers.two_side_light_en();
|
||||
result.back_color_diffuse_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_BACKDIFFUSE);
|
||||
result.back_color_specular_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_BACKSPECULAR);
|
||||
result.front_color_diffuse_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTDIFFUSE);
|
||||
result.front_color_specular_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTSPECULAR);
|
||||
result.redirected_textures = 0;
|
||||
result.shadow_textures = 0;
|
||||
|
||||
std::array<texture_dimension_extended, 16> texture_dimensions;
|
||||
const auto resolution_scale = rsx::get_resolution_scale();
|
||||
|
||||
for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
auto &tex = rsx::method_registers.fragment_textures[i];
|
||||
result.texture_scale[i][0] = 1.f;
|
||||
result.texture_scale[i][1] = 1.f;
|
||||
result.textures_alpha_kill[i] = 0;
|
||||
result.textures_zfunc[i] = 0;
|
||||
|
||||
if (!tex.enabled())
|
||||
{
|
||||
texture_dimensions[i] = texture_dimension_extended::texture_dimension_2d;
|
||||
}
|
||||
else
|
||||
{
|
||||
texture_dimensions[i] = tex.get_extended_texture_dimension();
|
||||
|
||||
if (tex.alpha_kill_enabled())
|
||||
{
|
||||
//alphakill can be ignored unless a valid comparison function is set
|
||||
const rsx::comparison_function func = (rsx::comparison_function)tex.zfunc();
|
||||
if (func < rsx::comparison_function::always && func > rsx::comparison_function::never)
|
||||
{
|
||||
result.textures_alpha_kill[i] = 1;
|
||||
result.textures_zfunc[i] = (u8)func;
|
||||
}
|
||||
}
|
||||
|
||||
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
|
||||
const u32 raw_format = tex.format();
|
||||
|
||||
if (raw_format & CELL_GCM_TEXTURE_UN)
|
||||
result.unnormalized_coords |= (1 << i);
|
||||
|
||||
if (sampler_descriptors[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
if (raw_format & CELL_GCM_TEXTURE_UN)
|
||||
{
|
||||
result.texture_scale[i][0] = (resolution_scale * sampler_descriptors[i]->internal_scale);
|
||||
result.texture_scale[i][1] = resolution_scale;
|
||||
}
|
||||
}
|
||||
|
||||
if (sampler_descriptors[i]->is_depth_texture)
|
||||
{
|
||||
const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
|
||||
switch (format)
|
||||
{
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_D8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_A4R4G4B4:
|
||||
case CELL_GCM_TEXTURE_R5G6B5:
|
||||
result.redirected_textures |= (1 << i);
|
||||
break;
|
||||
case CELL_GCM_TEXTURE_DEPTH16:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
|
||||
{
|
||||
const auto compare_mode = (rsx::comparison_function)tex.zfunc();
|
||||
if (result.textures_alpha_kill[i] == 0 &&
|
||||
compare_mode < rsx::comparison_function::always &&
|
||||
compare_mode > rsx::comparison_function::never)
|
||||
result.shadow_textures |= (1 << i);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.set_texture_dimension(texture_dimensions);
|
||||
}
|
||||
|
||||
void thread::get_current_fragment_program_legacy(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info)
|
||||
{
|
||||
auto &result = current_fragment_program = {};
|
||||
|
||||
|
@ -1412,8 +1515,8 @@ namespace rsx
|
|||
case CELL_GCM_TEXTURE_D8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_A4R4G4B4:
|
||||
case CELL_GCM_TEXTURE_R5G6B5:
|
||||
result.redirected_textures |= (1 << i);
|
||||
break;
|
||||
result.redirected_textures |= (1 << i);
|
||||
break;
|
||||
case CELL_GCM_TEXTURE_DEPTH16:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
|
||||
|
@ -1426,7 +1529,7 @@ namespace rsx
|
|||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
|
||||
LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -135,6 +135,8 @@ namespace rsx
|
|||
std::array<attribute_buffer_placement, 16> attribute_placement;
|
||||
};
|
||||
|
||||
struct sampled_image_descriptor_base;
|
||||
|
||||
class thread : public named_thread
|
||||
{
|
||||
std::shared_ptr<thread_ctrl> m_vblank_thread;
|
||||
|
@ -188,6 +190,7 @@ namespace rsx
|
|||
bool m_rtts_dirty;
|
||||
bool m_transform_constants_dirty;
|
||||
bool m_textures_dirty[16];
|
||||
bool m_vertex_textures_dirty[4];
|
||||
|
||||
protected:
|
||||
std::array<u32, 4> get_color_surface_addresses() const;
|
||||
|
@ -208,7 +211,9 @@ namespace rsx
|
|||
* get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth
|
||||
* returns whether surface is a render target and surface pitch in native format
|
||||
*/
|
||||
void get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info);
|
||||
void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
|
||||
void get_current_fragment_program_legacy(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info);
|
||||
|
||||
public:
|
||||
double fps_limit = 59.94;
|
||||
|
||||
|
|
|
@ -712,6 +712,14 @@ VKGSRender::~VKGSRender()
|
|||
m_rtts.destroy();
|
||||
m_texture_cache.destroy();
|
||||
|
||||
//Sampler handles
|
||||
for (auto& handle : fs_sampler_handles)
|
||||
handle.reset();
|
||||
|
||||
for (auto& handle : vs_sampler_handles)
|
||||
handle.reset();
|
||||
|
||||
//Overlay text handler
|
||||
m_text_writer.reset();
|
||||
|
||||
//Pipeline descriptors
|
||||
|
@ -750,6 +758,11 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
if (!result.violation_handled)
|
||||
return false;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
|
||||
if (result.num_flushable > 0)
|
||||
{
|
||||
const bool is_rsxthr = std::this_thread::get_id() == rsx_thread;
|
||||
|
@ -845,6 +858,23 @@ void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
|||
*m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()).violation_handled)
|
||||
{
|
||||
m_texture_cache.purge_dirty();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::notify_tile_unbound(u32 tile)
|
||||
{
|
||||
//TODO: Handle texture writeback
|
||||
//u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location);
|
||||
//on_notify_memory_unmapped(addr, tiles[tile].size);
|
||||
//m_rtts.invalidate_surface_address(addr, false);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -995,8 +1025,6 @@ void VKGSRender::end()
|
|||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
|
||||
|
||||
//Load program here since it is dependent on vertex state
|
||||
if (!check_program_status())
|
||||
{
|
||||
|
@ -1005,24 +1033,152 @@ void VKGSRender::end()
|
|||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
||||
m_setup_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
||||
//Close current pass to avoid conflict with texture functions
|
||||
close_render_pass();
|
||||
|
||||
//Programs data is dependent on vertex state
|
||||
std::chrono::time_point<steady_clock> vertex_start = state_check_end;
|
||||
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
|
||||
auto upload_info = upload_vertex_data();
|
||||
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_start = vertex_end;
|
||||
//Load textures
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (m_samplers_dirty || m_textures_dirty[i])
|
||||
{
|
||||
if (!fs_sampler_state[i])
|
||||
fs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled())
|
||||
{
|
||||
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
|
||||
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
|
||||
VkCompareOp depth_compare = fs_sampler_state[i]->is_depth_texture ? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true) : VK_COMPARE_OP_NEVER;
|
||||
|
||||
bool replace = !fs_sampler_handles[i];
|
||||
VkFilter min_filter;
|
||||
VkSamplerMipmapMode mip_mode;
|
||||
f32 min_lod = 0.f, max_lod = 0.f;
|
||||
f32 lod_bias = 0.f;
|
||||
|
||||
const f32 af_level = g_cfg.video.anisotropic_level_override > 0 ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso());
|
||||
const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s());
|
||||
const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t());
|
||||
const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r());
|
||||
const auto unnormalized_coords = !!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN);
|
||||
const auto mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter());
|
||||
const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color());
|
||||
|
||||
std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1)
|
||||
{
|
||||
min_lod = (float)(rsx::method_registers.fragment_textures[i].min_lod() >> 8);
|
||||
max_lod = (float)(rsx::method_registers.fragment_textures[i].max_lod() >> 8);
|
||||
lod_bias = rsx::method_registers.fragment_textures[i].bias();
|
||||
}
|
||||
else
|
||||
{
|
||||
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
}
|
||||
|
||||
if (fs_sampler_handles[i])
|
||||
{
|
||||
if (!fs_sampler_handles[i]->matches(wrap_s, wrap_t, wrap_r, unnormalized_coords, lod_bias, af_level, min_lod, max_lod,
|
||||
min_filter, mag_filter, mip_mode, border_color, fs_sampler_state[i]->is_depth_texture, depth_compare))
|
||||
{
|
||||
m_current_frame->samplers_to_clean.push_back(std::move(fs_sampler_handles[i]));
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (replace)
|
||||
{
|
||||
fs_sampler_handles[i] = std::make_unique<vk::sampler>(*m_device, wrap_s, wrap_t, wrap_r, unnormalized_coords, lod_bias, af_level, min_lod, max_lod,
|
||||
min_filter, mag_filter, mip_mode, border_color, fs_sampler_state[i]->is_depth_texture, depth_compare);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*sampler_state = {};
|
||||
}
|
||||
|
||||
m_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
int texture_index = i + rsx::limits::fragment_textures_count;
|
||||
|
||||
if (m_samplers_dirty || m_vertex_textures_dirty[i])
|
||||
{
|
||||
if (!vs_sampler_state[i])
|
||||
vs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
|
||||
bool replace = !vs_sampler_handles[i];
|
||||
const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN);
|
||||
const auto min_lod = (f32)rsx::method_registers.vertex_textures[i].min_lod();
|
||||
const auto max_lod = (f32)rsx::method_registers.vertex_textures[i].max_lod();
|
||||
const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color());
|
||||
|
||||
if (vs_sampler_handles[i])
|
||||
{
|
||||
if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
unnormalized_coords, 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color))
|
||||
{
|
||||
m_current_frame->samplers_to_clean.push_back(std::move(vs_sampler_handles[i]));
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (replace)
|
||||
{
|
||||
vs_sampler_handles[i] = std::make_unique<vk::sampler>(
|
||||
*m_device,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
unnormalized_coords,
|
||||
0.f, 1.f, min_lod, max_lod,
|
||||
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color);
|
||||
}
|
||||
}
|
||||
else
|
||||
*sampler_state = {};
|
||||
|
||||
m_vertex_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
m_samplers_dirty.store(false);
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||
|
||||
//Load program
|
||||
std::chrono::time_point<steady_clock> program_start = vertex_end;
|
||||
std::chrono::time_point<steady_clock> program_start = textures_end;
|
||||
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
|
||||
|
||||
m_program->bind_uniform(m_persistent_attribute_storage, "persistent_input_stream", m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(m_volatile_attribute_storage, "volatile_input_stream", m_current_frame->descriptor_set);
|
||||
|
||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||
|
||||
//Close current pass to avoid conflict with texture functions
|
||||
close_render_pass();
|
||||
|
||||
if (g_cfg.video.strict_rendering_mode)
|
||||
{
|
||||
auto copy_rtt_contents = [&](vk::render_target* surface)
|
||||
|
@ -1079,7 +1235,7 @@ void VKGSRender::end()
|
|||
}
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||
textures_start = steady_clock::now();
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
|
@ -1091,48 +1247,15 @@ void VKGSRender::end()
|
|||
continue;
|
||||
}
|
||||
|
||||
vk::image_view *texture0 = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
|
||||
if (!texture0)
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
if (!sampler_state->image_handle)
|
||||
{
|
||||
LOG_ERROR(RSX, "Texture upload failed to texture index %d. Binding null sampler.", i);
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
|
||||
|
||||
VkBool32 is_depth_texture = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8);
|
||||
VkCompareOp depth_compare = is_depth_texture? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true): VK_COMPARE_OP_NEVER;
|
||||
|
||||
VkFilter min_filter;
|
||||
VkSamplerMipmapMode mip_mode;
|
||||
float min_lod = 0.f, max_lod = 0.f;
|
||||
float lod_bias = 0.f;
|
||||
|
||||
std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1)
|
||||
{
|
||||
min_lod = (float)(rsx::method_registers.fragment_textures[i].min_lod() >> 8);
|
||||
max_lod = (float)(rsx::method_registers.fragment_textures[i].max_lod() >> 8);
|
||||
lod_bias = rsx::method_registers.fragment_textures[i].bias();
|
||||
}
|
||||
else
|
||||
{
|
||||
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
}
|
||||
|
||||
f32 af_level = g_cfg.video.anisotropic_level_override > 0 ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso());
|
||||
m_current_frame->samplers_to_clean.push_back(std::make_unique<vk::sampler>(
|
||||
*m_device,
|
||||
vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()),
|
||||
!!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN),
|
||||
lod_bias, af_level, min_lod, max_lod,
|
||||
min_filter, vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()), mip_mode, vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()),
|
||||
is_depth_texture, depth_compare));
|
||||
|
||||
m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform({ fs_sampler_handles[i]->value, sampler_state->image_handle->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1146,28 +1269,19 @@ void VKGSRender::end()
|
|||
continue;
|
||||
}
|
||||
|
||||
vk::image_view *texture0 = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
|
||||
if (!texture0)
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
if (!sampler_state->image_handle)
|
||||
{
|
||||
LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i);
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
continue;
|
||||
}
|
||||
|
||||
m_current_frame->samplers_to_clean.push_back(std::make_unique<vk::sampler>(
|
||||
*m_device,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
!!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN),
|
||||
0.f, 1.f, (f32)rsx::method_registers.vertex_textures[i].min_lod(), (f32)rsx::method_registers.vertex_textures[i].max_lod(),
|
||||
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color())
|
||||
));
|
||||
|
||||
m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform({ vs_sampler_handles[i]->value, sampler_state->image_handle->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
||||
textures_end = steady_clock::now();
|
||||
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||
|
||||
//While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point
|
||||
|
@ -1828,36 +1942,13 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
|
|||
|
||||
bool VKGSRender::check_program_status()
|
||||
{
|
||||
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple<bool, u16>
|
||||
{
|
||||
vk::render_target *surface = nullptr;
|
||||
return (rsx::method_registers.shader_program_address() != 0);
|
||||
}
|
||||
|
||||
if (!is_depth)
|
||||
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
|
||||
else
|
||||
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
|
||||
|
||||
const bool dirty_framebuffer = (surface != nullptr && !m_texture_cache.test_framebuffer(texaddr));
|
||||
if (dirty_framebuffer || !surface)
|
||||
{
|
||||
if (is_depth && m_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex)))
|
||||
return std::make_tuple(true, 0);
|
||||
|
||||
if (dirty_framebuffer)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
|
||||
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
surface = rsc.surface;
|
||||
}
|
||||
|
||||
return std::make_tuple(true, surface->native_pitch);
|
||||
};
|
||||
|
||||
get_current_fragment_program(rtt_lookup_func);
|
||||
if (!current_fragment_program.valid) return false;
|
||||
void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
|
||||
{
|
||||
get_current_fragment_program(fs_sampler_state);
|
||||
verify(HERE), current_fragment_program.valid;
|
||||
|
||||
get_current_vertex_program();
|
||||
|
||||
|
@ -2033,7 +2124,7 @@ bool VKGSRender::check_program_status()
|
|||
|
||||
if (update_stencil_info_front)
|
||||
{
|
||||
VkStencilFaceFlags face_flag = (update_stencil_info_back)? VK_STENCIL_FACE_FRONT_BIT: VK_STENCIL_FRONT_AND_BACK;
|
||||
VkStencilFaceFlags face_flag = (update_stencil_info_back) ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FRONT_AND_BACK;
|
||||
|
||||
vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask());
|
||||
vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask());
|
||||
|
@ -2053,14 +2144,6 @@ bool VKGSRender::check_program_status()
|
|||
vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
|
||||
{
|
||||
auto &vertex_program = current_vertex_program;
|
||||
auto &fragment_program = current_fragment_program;
|
||||
|
||||
const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program);
|
||||
const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float));
|
||||
const size_t required_mem = 512 + 8192 + fragment_buffer_sz;
|
||||
|
@ -2733,13 +2816,7 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
|
|||
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
|
||||
m_current_command_buffer->begin();
|
||||
|
||||
return result;
|
||||
}
|
||||
m_samplers_dirty.store(true);
|
||||
|
||||
void VKGSRender::notify_tile_unbound(u32 tile)
|
||||
{
|
||||
//TODO: Handle texture writeback
|
||||
//u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location);
|
||||
//on_notify_memory_unmapped(addr, tiles[tile].size);
|
||||
//m_rtts.invalidate_surface_address(addr, false);
|
||||
return result;
|
||||
}
|
|
@ -129,6 +129,16 @@ private:
|
|||
|
||||
std::unique_ptr<vk::text_writer> m_text_writer;
|
||||
|
||||
std::mutex m_sampler_mutex;
|
||||
std::atomic_bool m_samplers_dirty = { true };
|
||||
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
|
||||
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {};
|
||||
std::array<std::unique_ptr<vk::sampler>, rsx::limits::fragment_textures_count> fs_sampler_handles;
|
||||
std::array<std::unique_ptr<vk::sampler>, rsx::limits::vertex_textures_count> vs_sampler_handles;
|
||||
|
||||
VkBufferView m_persistent_attribute_storage;
|
||||
VkBufferView m_volatile_attribute_storage;
|
||||
|
||||
public:
|
||||
//vk::fbo draw_fbo;
|
||||
std::unique_ptr<vk::vertex_cache> m_vertex_cache;
|
||||
|
|
|
@ -576,7 +576,7 @@ namespace vk
|
|||
VkSamplerCreateInfo info = {};
|
||||
|
||||
sampler(VkDevice dev, VkSamplerAddressMode clamp_u, VkSamplerAddressMode clamp_v, VkSamplerAddressMode clamp_w,
|
||||
bool unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod,
|
||||
VkBool32 unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod,
|
||||
VkFilter min_filter, VkFilter mag_filter, VkSamplerMipmapMode mipmap_mode, VkBorderColor border_color,
|
||||
VkBool32 depth_compare = false, VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER)
|
||||
: m_device(dev)
|
||||
|
@ -607,6 +607,21 @@ namespace vk
|
|||
vkDestroySampler(m_device, value, nullptr);
|
||||
}
|
||||
|
||||
bool matches(VkSamplerAddressMode clamp_u, VkSamplerAddressMode clamp_v, VkSamplerAddressMode clamp_w,
|
||||
VkBool32 unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod,
|
||||
VkFilter min_filter, VkFilter mag_filter, VkSamplerMipmapMode mipmap_mode, VkBorderColor border_color,
|
||||
VkBool32 depth_compare = false, VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER)
|
||||
{
|
||||
if (info.magFilter != mag_filter || info.minFilter != min_filter || info.mipmapMode != mipmap_mode ||
|
||||
info.addressModeU != clamp_u || info.addressModeV != clamp_v || info.addressModeW != clamp_w ||
|
||||
info.compareEnable != depth_compare || info.unnormalizedCoordinates != unnormalized_coordinates ||
|
||||
info.mipLodBias != mipLodBias || info.maxAnisotropy != max_anisotropy || info.maxLod != max_lod ||
|
||||
info.minLod != min_lod || info.compareOp != depth_compare_mode || info.borderColor != border_color)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
sampler(const sampler&) = delete;
|
||||
sampler(sampler&&) = delete;
|
||||
private:
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace vk
|
|||
if (memcmp(&att_state[0], &other.att_state[0], sizeof(VkPipelineColorBlendAttachmentState)))
|
||||
return false;
|
||||
|
||||
if (render_pass != other.render_pass)
|
||||
if (render_pass_location != other.render_pass_location)
|
||||
return false;
|
||||
|
||||
if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))
|
||||
|
|
|
@ -47,10 +47,17 @@ namespace vk
|
|||
this->depth = depth;
|
||||
this->mipmaps = mipmaps;
|
||||
|
||||
uploaded_image_view.reset(view);
|
||||
vram_texture = image;
|
||||
if (managed)
|
||||
{
|
||||
managed_texture.reset(image);
|
||||
uploaded_image_view.reset(view);
|
||||
}
|
||||
else
|
||||
{
|
||||
verify(HERE), uploaded_image_view.get() == nullptr;
|
||||
}
|
||||
|
||||
if (managed) managed_texture.reset(image);
|
||||
vram_texture = image;
|
||||
|
||||
//TODO: Properly compute these values
|
||||
if (rsx_pitch > 0)
|
||||
|
@ -157,15 +164,27 @@ namespace vk
|
|||
const u16 internal_width = std::min(width, rsx::apply_resolution_scale(width, true));
|
||||
const u16 internal_height = std::min(height, rsx::apply_resolution_scale(height, true));
|
||||
|
||||
VkImageAspectFlags aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
switch (vram_texture->info.format)
|
||||
{
|
||||
case VK_FORMAT_D16_UNORM:
|
||||
aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
break;
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
break;
|
||||
}
|
||||
|
||||
VkBufferImageCopy copyRegion = {};
|
||||
copyRegion.bufferOffset = 0;
|
||||
copyRegion.bufferRowLength = internal_width;
|
||||
copyRegion.bufferImageHeight = internal_height;
|
||||
copyRegion.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
|
||||
copyRegion.imageSubresource = {aspect_flag, 0, 0, 1};
|
||||
copyRegion.imageOffset = {};
|
||||
copyRegion.imageExtent = {internal_width, internal_height, 1};
|
||||
|
||||
VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
||||
VkImageSubresourceRange subresource_range = { aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 };
|
||||
|
||||
VkImageLayout layout = vram_texture->current_layout;
|
||||
change_image_layout(cmd, vram_texture, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
|
||||
|
@ -246,6 +265,10 @@ namespace vk
|
|||
bool swap_bytes = false;
|
||||
switch (vram_texture->info.format)
|
||||
{
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
//TODO: Hardware tests to determine correct memory layout
|
||||
case VK_FORMAT_D16_UNORM:
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT:
|
||||
case VK_FORMAT_R32G32B32A32_SFLOAT:
|
||||
case VK_FORMAT_R32_SFLOAT:
|
||||
|
@ -312,6 +335,19 @@ namespace vk
|
|||
return vram_texture->info.format == tex->info.format;
|
||||
}
|
||||
|
||||
bool is_depth_texture() const
|
||||
{
|
||||
switch (vram_texture->info.format)
|
||||
{
|
||||
case VK_FORMAT_D16_UNORM:
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
u64 get_sync_timestamp() const
|
||||
{
|
||||
return sync_timestamp;
|
||||
|
@ -453,7 +489,11 @@ namespace vk
|
|||
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
|
||||
vk::change_image_layout(cmd, source, old_src_layout, subresource_range);
|
||||
|
||||
const u32 resource_memory = w * h * 4; //Rough approximate
|
||||
m_discardable_storage.push_back({ image, view });
|
||||
m_discardable_storage.back().block_size = resource_memory;
|
||||
m_discarded_memory_size += resource_memory;
|
||||
|
||||
return m_discardable_storage.back().view.get();
|
||||
}
|
||||
|
||||
|
@ -733,7 +773,7 @@ namespace vk
|
|||
}
|
||||
|
||||
template<typename RsxTextureType>
|
||||
image_view* _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts)
|
||||
sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts)
|
||||
{
|
||||
return upload_texture(cmd, tex, m_rtts, *m_device, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
|
||||
}
|
||||
|
|
|
@ -237,7 +237,8 @@ VKGSRender::upload_vertex_data()
|
|||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX;
|
||||
|
||||
VkBufferView persistent_view = VK_NULL_HANDLE, volatile_view = VK_NULL_HANDLE;
|
||||
m_persistent_attribute_storage = VK_NULL_HANDLE;
|
||||
m_volatile_attribute_storage = VK_NULL_HANDLE;
|
||||
|
||||
if (required.first > 0)
|
||||
{
|
||||
|
@ -277,11 +278,11 @@ VKGSRender::upload_vertex_data()
|
|||
}
|
||||
}
|
||||
|
||||
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
m_persistent_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
}
|
||||
else
|
||||
{
|
||||
persistent_view = null_buffer_view->value;
|
||||
m_persistent_attribute_storage = null_buffer_view->value;
|
||||
}
|
||||
|
||||
if (required.second > 0)
|
||||
|
@ -290,16 +291,13 @@ VKGSRender::upload_vertex_data()
|
|||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device,
|
||||
m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second));
|
||||
|
||||
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
m_volatile_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
}
|
||||
else
|
||||
{
|
||||
volatile_view = null_buffer_view->value;
|
||||
m_volatile_attribute_storage = null_buffer_view->value;
|
||||
}
|
||||
|
||||
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(volatile_view, "volatile_input_stream", m_current_frame->descriptor_set);
|
||||
|
||||
//Write all the data once if possible
|
||||
if (required.first && required.second && volatile_offset > persistent_offset)
|
||||
{
|
||||
|
|
|
@ -469,6 +469,15 @@ namespace rsx
|
|||
rsx->m_textures_dirty[index] = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<u32 index>
|
||||
struct set_vertex_texture_dirty_bit
|
||||
{
|
||||
static void impl(thread* rsx, u32 _reg, u32 arg)
|
||||
{
|
||||
rsx->m_vertex_textures_dirty[index] = true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace nv308a
|
||||
|
@ -1539,6 +1548,14 @@ namespace rsx
|
|||
bind_range<NV4097_SET_TEXTURE_FILTER, 8, 16, nv4097::set_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_TEXTURE_IMAGE_RECT, 8, 16, nv4097::set_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_TEXTURE_BORDER_COLOR, 8, 16, nv4097::set_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_VERTEX_TEXTURE_OFFSET, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_VERTEX_TEXTURE_FORMAT, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_VERTEX_TEXTURE_ADDRESS, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_VERTEX_TEXTURE_CONTROL0, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_VERTEX_TEXTURE_CONTROL3, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_VERTEX_TEXTURE_FILTER, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_VERTEX_TEXTURE_IMAGE_RECT, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
|
||||
bind_range<NV4097_SET_VERTEX_TEXTURE_BORDER_COLOR, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
|
||||
bind<NV4097_SET_RENDER_ENABLE, nv4097::set_render_mode>();
|
||||
bind<NV4097_SET_ZCULL_EN, nv4097::set_zcull_render_enable>();
|
||||
bind<NV4097_SET_ZCULL_STATS_ENABLE, nv4097::set_zcull_stats_enable>();
|
||||
|
|
Loading…
Reference in New Issue