diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index a006aea78a..b4cf28946c 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -466,15 +466,6 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con binary1.shadow_textures != binary2.shadow_textures || binary1.redirected_textures != binary2.redirected_textures) return false; - for (u8 index = 0; index < 16; ++index) - { - if (binary1.textures_alpha_kill[index] != binary2.textures_alpha_kill[index]) - return false; - - if (binary1.textures_zfunc[index] != binary2.textures_zfunc[index]) - return false; - } - const void* instBuffer1 = binary1.get_data(); const void* instBuffer2 = binary2.get_data(); size_t instIndex = 0; diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 9ee4e98322..271964f431 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -283,8 +283,11 @@ void GLGSRender::load_texture_env() surface_store_tag = m_rtts.cache_tag; } - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { + if (!(textures_ref & 1)) + continue; + if (!fs_sampler_state[i]) fs_sampler_state[i] = std::make_unique(); @@ -309,8 +312,11 @@ void GLGSRender::load_texture_env() } } - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { + if (!(textures_ref & 1)) + continue; + if (!vs_sampler_state[i]) vs_sampler_state[i] = std::make_unique(); @@ -341,75 +347,75 @@ void GLGSRender::bind_texture_env() // Bind textures and resolve external copy operations gl::command_context cmd{ gl_state }; - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { - if (current_fp_metadata.referenced_textures_mask & (1 << i)) + if (!(textures_ref & 1)) + continue; + + _SelectTexture(GL_FRAGMENT_TEXTURES_START + i); + + gl::texture_view* view = nullptr; + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) { - _SelectTexture(GL_FRAGMENT_TEXTURES_START + i); - - gl::texture_view* view = nullptr; - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled() && - sampler_state->validate()) + if (view = sampler_state->image_handle; !view) [[unlikely]] { - if (view = sampler_state->image_handle; !view) [[unlikely]] - { - view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); - } + view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); } + } - if (view) [[likely]] + if (view) [[likely]] + { + view->bind(); + + if (current_fragment_program.redirected_textures & (1 << i)) { - view->bind(); + _SelectTexture(GL_STENCIL_MIRRORS_START + i); - if (current_fragment_program.redirected_textures & (1 << i)) - { - _SelectTexture(GL_STENCIL_MIRRORS_START + i); - - auto root_texture = static_cast(view->image()); - auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil); - stencil_view->bind(); - } + auto root_texture = static_cast(view->image()); + auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil); + stencil_view->bind(); } - else + } + else + { + auto target = gl::get_target(current_fragment_program.get_texture_dimension(i)); + glBindTexture(target, m_null_textures[target]->id()); + + if (current_fragment_program.redirected_textures & (1 << i)) { - auto target = gl::get_target(current_fragment_program.get_texture_dimension(i)); + _SelectTexture(GL_STENCIL_MIRRORS_START + i); glBindTexture(target, m_null_textures[target]->id()); - - if (current_fragment_program.redirected_textures & (1 << i)) - { - _SelectTexture(GL_STENCIL_MIRRORS_START + i); - glBindTexture(target, m_null_textures[target]->id()); - } } } } - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { - if (current_vp_metadata.referenced_textures_mask & (1 << i)) - { - auto sampler_state = static_cast(vs_sampler_state[i].get()); - _SelectTexture(GL_VERTEX_TEXTURES_START + i); + if (!(textures_ref & 1)) + continue; - if (rsx::method_registers.vertex_textures[i].enabled() && - sampler_state->validate()) + auto sampler_state = static_cast(vs_sampler_state[i].get()); + _SelectTexture(GL_VERTEX_TEXTURES_START + i); + + if (rsx::method_registers.vertex_textures[i].enabled() && + sampler_state->validate()) + { + if (sampler_state->image_handle) [[likely]] { - if (sampler_state->image_handle) [[likely]] - { - sampler_state->image_handle->bind(); - } - else - { - m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(); - } + sampler_state->image_handle->bind(); } else { - glBindTexture(GL_TEXTURE_2D, GL_NONE); + m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(); } } + else + { + glBindTexture(GL_TEXTURE_2D, GL_NONE); + } } } @@ -584,6 +590,9 @@ void GLGSRender::emit_geometry(u32 sub_index) void GLGSRender::begin() { + // Save shader state now before prefetch and loading happens + m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits); + rsx::thread::begin(); if (skip_current_frame || cond_render_ctrl.disable_rendering()) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 341a06fe2e..f7b53c8320 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -648,7 +648,7 @@ bool GLGSRender::load_program() { const auto shadermode = g_cfg.video.shadermode.get(); - if ((m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits))) + if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits) { get_current_fragment_program(fs_sampler_state); verify(HERE), current_fragment_program.valid; diff --git a/rpcs3/Emu/RSX/RSXFragmentProgram.h b/rpcs3/Emu/RSX/RSXFragmentProgram.h index 21526f1c16..6197731585 100644 --- a/rpcs3/Emu/RSX/RSXFragmentProgram.h +++ b/rpcs3/Emu/RSX/RSXFragmentProgram.h @@ -279,8 +279,6 @@ struct RSXFragmentProgram u32 texcoord_control_mask = 0; float texture_scale[16][4]; - u8 textures_alpha_kill[16]; - u8 textures_zfunc[16]; bool valid = false; @@ -303,8 +301,6 @@ struct RSXFragmentProgram RSXFragmentProgram() { std::memset(texture_scale, 0, sizeof(float) * 16 * 4); - std::memset(textures_alpha_kill, 0, sizeof(u8) * 16); - std::memset(textures_zfunc, 0, sizeof(u8) * 16); } static RSXFragmentProgram clone(const RSXFragmentProgram& prog) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 6b02f84969..520898c641 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -361,15 +361,16 @@ namespace rsx } } - if (m_graphics_state & rsx::pipeline_state::fragment_program_dirty) + if (m_graphics_state & rsx::pipeline_state::fragment_program_ucode_dirty) { // Request for update of fragment constants if the program block is invalidated m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty; - - // Request for update of texture parameters if the program is likely to have changed - m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty; } + // Preload the GPU programs for this draw call if needed + prefetch_vertex_program(); + prefetch_fragment_program(); + in_begin_end = true; } @@ -783,7 +784,15 @@ namespace rsx void thread::fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program) { - memcpy(buffer, fragment_program.texture_scale, 16 * 4 * sizeof(float)); + // Copy only the relevant section + if (current_fp_metadata.referenced_textures_mask) + { + const auto start = std::countr_zero(current_fp_metadata.referenced_textures_mask); + const auto end = 16 - std::countl_zero(current_fp_metadata.referenced_textures_mask); + const auto mem_offset = (start * 16); + const auto mem_size = (end - start) * 16; + memcpy(static_cast(buffer) + mem_offset, reinterpret_cast(fragment_program.texture_scale) + mem_offset, mem_size); + } } u64 thread::timestamp() @@ -1490,15 +1499,48 @@ namespace rsx return true; } - void thread::get_current_vertex_program(const std::array, rsx::limits::vertex_textures_count>& sampler_descriptors, bool skip_textures, bool skip_vertex_inputs) + void thread::prefetch_fragment_program() { - if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty)) + if (!(m_graphics_state & rsx::pipeline_state::fragment_program_ucode_dirty)) return; - m_graphics_state &= ~(rsx::pipeline_state::vertex_program_dirty); + m_graphics_state &= ~rsx::pipeline_state::fragment_program_ucode_dirty; + + const auto [program_offset, program_location] = method_registers.shader_program_address(); + auto data_ptr = vm::base(rsx::get_address(program_offset, program_location, HERE)); + current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(data_ptr); + + current_fragment_program.data = (static_cast(data_ptr) + current_fp_metadata.program_start_offset); + current_fragment_program.offset = program_offset + current_fp_metadata.program_start_offset; + current_fragment_program.ucode_length = current_fp_metadata.program_ucode_length; + current_fragment_program.total_length = current_fp_metadata.program_ucode_length + current_fp_metadata.program_start_offset; + current_fragment_program.valid = true; + + if (!(m_graphics_state & rsx::pipeline_state::fragment_program_state_dirty)) + { + // Verify current texture state is valid + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) + { + if (!(textures_ref & 1)) continue; + + if (m_textures_dirty[i]) + { + m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty; + break; + } + } + } + } + + void thread::prefetch_vertex_program() + { + if (!(m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty)) + return; + + m_graphics_state &= ~rsx::pipeline_state::vertex_program_ucode_dirty; + const u32 transform_program_start = rsx::method_registers.transform_program_start(); - current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); - current_vertex_program.skip_vertex_input_check = skip_vertex_inputs; + current_vertex_program.skip_vertex_input_check = true; current_vertex_program.rsx_vertex_inputs.clear(); current_vertex_program.data.reserve(512 * 4); @@ -1512,59 +1554,38 @@ namespace rsx current_vertex_program // [out] Program object ); - if (!skip_textures && current_vp_metadata.referenced_textures_mask != 0) + if (!(m_graphics_state & rsx::pipeline_state::vertex_program_state_dirty)) { - for (u32 i = 0; i < rsx::limits::vertex_textures_count; ++i) + // Verify current texture state is valid + for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { - const auto &tex = rsx::method_registers.vertex_textures[i]; - if (tex.enabled() && (current_vp_metadata.referenced_textures_mask & (1 << i))) + if (!(textures_ref & 1)) continue; + + if (m_vertex_textures_dirty[i]) { - current_vertex_program.texture_dimensions |= (static_cast(sampler_descriptors[i]->image_type) << (i << 1)); + m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty; + break; } } } + } - if (!skip_vertex_inputs) + void thread::get_current_vertex_program(const std::array, rsx::limits::vertex_textures_count>& sampler_descriptors) + { + if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty)) + return; + + verify(HERE), !(m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty); + current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); + + for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { - const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); - const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); + if (!(textures_ref & 1)) continue; - for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + const auto &tex = rsx::method_registers.vertex_textures[i]; + if (tex.enabled() && (current_vp_metadata.referenced_textures_mask & (1 << i))) { - bool enabled = !!(input_mask & (1 << index)); - if (!enabled) - continue; - - if (rsx::method_registers.vertex_arrays_info[index].size() > 0) - { - current_vertex_program.rsx_vertex_inputs.push_back( - { index, - rsx::method_registers.vertex_arrays_info[index].size(), - rsx::method_registers.vertex_arrays_info[index].frequency(), - !!((modulo_mask >> index) & 0x1), - true, - is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 }); - } - else if (vertex_push_buffers[index].vertex_count > 1) - { - current_vertex_program.rsx_vertex_inputs.push_back( - { index, - vertex_push_buffers[index].size, - 1, - false, - true, - is_int_type(vertex_push_buffers[index].type), 0 }); - } - else if (rsx::method_registers.register_vertex_info[index].size > 0) - { - current_vertex_program.rsx_vertex_inputs.push_back( - { index, - rsx::method_registers.register_vertex_info[index].size, - rsx::method_registers.register_vertex_info[index].frequency, - !!((modulo_mask >> index) & 0x1), - false, - is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 }); - } + current_vertex_program.texture_dimensions |= (static_cast(sampler_descriptors[i]->image_type) << (i << 1)); } } } @@ -1740,44 +1761,42 @@ namespace rsx if (!(m_graphics_state & rsx::pipeline_state::fragment_program_dirty)) return; + verify(HERE), !(m_graphics_state & rsx::pipeline_state::fragment_program_ucode_dirty); + m_graphics_state &= ~(rsx::pipeline_state::fragment_program_dirty); - auto &result = current_fragment_program = {}; - const auto [program_offset, program_location] = method_registers.shader_program_address(); + current_fragment_program.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT); + current_fragment_program.texcoord_control_mask = rsx::method_registers.texcoord_control_mask(); + current_fragment_program.texture_dimensions = 0; + current_fragment_program.unnormalized_coords = 0; + current_fragment_program.two_sided_lighting = rsx::method_registers.two_side_light_en(); + current_fragment_program.redirected_textures = 0; + current_fragment_program.shadow_textures = 0; - result.data = vm::base(rsx::get_address(program_offset, program_location, HERE)); - current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.get_data()); - - result.data = (static_cast(result.get_data()) + current_fp_metadata.program_start_offset); - result.offset = program_offset + current_fp_metadata.program_start_offset; - result.ucode_length = current_fp_metadata.program_ucode_length; - result.total_length = result.ucode_length + current_fp_metadata.program_start_offset; - result.valid = true; - result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT); - result.texcoord_control_mask = rsx::method_registers.texcoord_control_mask(); - result.unnormalized_coords = 0; - result.two_sided_lighting = rsx::method_registers.two_side_light_en(); - result.redirected_textures = 0; - result.shadow_textures = 0; + memset(current_fragment_program.texture_scale, 0, sizeof(current_fragment_program.texture_scale)); if (method_registers.current_draw_clause.primitive == primitive_type::points && method_registers.point_sprite_enabled()) { // Set high word of the control mask to store point sprite control - result.texcoord_control_mask |= u32(method_registers.point_sprite_control_mask()) << 16; + current_fragment_program.texcoord_control_mask |= u32(method_registers.point_sprite_control_mask()) << 16; } - for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i) + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { - auto &tex = rsx::method_registers.fragment_textures[i]; - result.texture_scale[i][0] = sampler_descriptors[i]->scale_x; - result.texture_scale[i][1] = sampler_descriptors[i]->scale_y; - result.texture_scale[i][2] = std::bit_cast(tex.remap()); + if (!(textures_ref & 1)) continue; - if (tex.enabled() && (current_fp_metadata.referenced_textures_mask & (1 << i))) + auto &tex = rsx::method_registers.fragment_textures[i]; + if (tex.enabled()) { + current_fragment_program.texture_scale[i][0] = sampler_descriptors[i]->scale_x; + current_fragment_program.texture_scale[i][1] = sampler_descriptors[i]->scale_y; + current_fragment_program.texture_scale[i][2] = std::bit_cast(tex.remap()); + + m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty; + u32 texture_control = 0; - result.texture_dimensions |= (static_cast(sampler_descriptors[i]->image_type) << (i << 1)); + current_fragment_program.texture_dimensions |= (static_cast(sampler_descriptors[i]->image_type) << (i << 1)); if (tex.alpha_kill_enabled()) { @@ -1790,7 +1809,7 @@ namespace rsx const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); if (raw_format & CELL_GCM_TEXTURE_UN) - result.unnormalized_coords |= (1 << i); + current_fragment_program.unnormalized_coords |= (1 << i); if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR) { @@ -1808,8 +1827,8 @@ namespace rsx // TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that) u32 control_bits = sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32? (1u << 16) : 0u; control_bits |= tex.remap() & 0xFFFF; - result.redirected_textures |= (1 << i); - result.texture_scale[i][2] = std::bit_cast(control_bits); + current_fragment_program.redirected_textures |= (1 << i); + current_fragment_program.texture_scale[i][2] = std::bit_cast(control_bits); break; } case CELL_GCM_TEXTURE_DEPTH16: @@ -1818,11 +1837,11 @@ namespace rsx case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: { const auto compare_mode = tex.zfunc(); - if (result.textures_alpha_kill[i] == 0 && + if (!tex.alpha_kill_enabled() && compare_mode < rsx::comparison_function::always && compare_mode > rsx::comparison_function::never) { - result.shadow_textures |= (1 << i); + current_fragment_program.shadow_textures |= (1 << i); texture_control |= u32(tex.zfunc()) << 8; } break; @@ -1900,12 +1919,12 @@ namespace rsx #ifdef __APPLE__ texture_control |= (sampler_descriptors[i]->encoded_component_map() << 16); #endif - result.texture_scale[i][3] = std::bit_cast(texture_control); + current_fragment_program.texture_scale[i][3] = std::bit_cast(texture_control); } } //Sanity checks - if (result.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + if (current_fragment_program.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { //Check that the depth stage is not disabled if (!rsx::method_registers.depth_test_enabled()) @@ -1924,7 +1943,7 @@ namespace rsx address_range::start_length(dst_offset, size))) [[unlikely]] { // Data overlaps - m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; + m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty; return true; } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 91b69da44b..cb0ec16873 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -110,24 +110,28 @@ namespace rsx enum pipeline_state : u32 { - fragment_program_dirty = 0x1, // Fragment program changed - vertex_program_dirty = 0x2, // Vertex program changed - fragment_state_dirty = 0x4, // Fragment state changed (alpha test, etc) - vertex_state_dirty = 0x8, // Vertex state changed (scale_offset, clip planes, etc) - transform_constants_dirty = 0x10, // Transform constants changed - fragment_constants_dirty = 0x20, // Fragment constants changed - framebuffer_reads_dirty = 0x40, // Framebuffer contents changed - fragment_texture_state_dirty = 0x80, // Fragment texture parameters changed - vertex_texture_state_dirty = 0x100, // Fragment texture parameters changed - scissor_config_state_dirty = 0x200, // Scissor region changed - zclip_config_state_dirty = 0x400, // Viewport Z clip changed + fragment_program_ucode_dirty = 0x1, // Fragment program ucode changed + vertex_program_ucode_dirty = 0x2, // Vertex program ucode changed + fragment_program_state_dirty = 0x4, // Fragment program state changed + vertex_program_state_dirty = 0x8, // Vertex program state changed + fragment_state_dirty = 0x10, // Fragment state changed (alpha test, etc) + vertex_state_dirty = 0x20, // Vertex state changed (scale_offset, clip planes, etc) + transform_constants_dirty = 0x40, // Transform constants changed + fragment_constants_dirty = 0x80, // Fragment constants changed + framebuffer_reads_dirty = 0x100, // Framebuffer contents changed + fragment_texture_state_dirty = 0x200, // Fragment texture parameters changed + vertex_texture_state_dirty = 0x400, // Fragment texture parameters changed + scissor_config_state_dirty = 0x800, // Scissor region changed + zclip_config_state_dirty = 0x1000, // Viewport Z clip changed - scissor_setup_invalid = 0x800, // Scissor configuration is broken - scissor_setup_clipped = 0x1000, // Scissor region is cropped by viewport constraint + scissor_setup_invalid = 0x2000, // Scissor configuration is broken + scissor_setup_clipped = 0x4000, // Scissor region is cropped by viewport constraint - polygon_stipple_pattern_dirty = 0x2000, // Rasterizer stippling pattern changed - line_stipple_pattern_dirty = 0x4000, // Line stippling pattern changed + polygon_stipple_pattern_dirty = 0x8000, // Rasterizer stippling pattern changed + line_stipple_pattern_dirty = 0x10000, // Line stippling pattern changed + fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty, + vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty, invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty, invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty, memory_barrier_bits = framebuffer_reads_dirty, @@ -767,7 +771,13 @@ namespace rsx RSXVertexProgram current_vertex_program = {}; RSXFragmentProgram current_fragment_program = {}; - void get_current_vertex_program(const std::array, rsx::limits::vertex_textures_count>& sampler_descriptors, bool skip_textures = false, bool skip_vertex_inputs = true); + // Prefetch and analyze the currently active fragment program ucode + void prefetch_fragment_program(); + + // Prefetch and analyze the currently active vertex program ucode + void prefetch_vertex_program(); + + void get_current_vertex_program(const std::array, rsx::limits::vertex_textures_count>& sampler_descriptors); /** * Gets current fragment program and associated fragment state diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 4d7e59f501..dc9b00d0f5 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -152,8 +152,11 @@ void VKGSRender::load_texture_env() surface_store_tag = m_rtts.cache_tag; } - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { + if (!(textures_ref & 1)) + continue; + if (!fs_sampler_state[i]) fs_sampler_state[i] = std::make_unique(); @@ -289,8 +292,11 @@ void VKGSRender::load_texture_env() } } - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { + if (!(textures_ref & 1)) + continue; + if (!vs_sampler_state[i]) vs_sampler_state[i] = std::make_unique(); @@ -357,217 +363,217 @@ void VKGSRender::load_texture_env() void VKGSRender::bind_texture_env() { - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { - if (current_fp_metadata.referenced_textures_mask & (1 << i)) + if (!(textures_ref & 1)) + continue; + + vk::image_view* view = nullptr; + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) { - vk::image_view* view = nullptr; - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled() && - sampler_state->validate()) + if (view = sampler_state->image_handle; !view) { - if (view = sampler_state->image_handle; !view) - { - //Requires update, copy subresource - view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); - } - else - { - switch (auto raw = view->image(); raw->current_layout) - { - default: - //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - break; - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_GENERAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; - if (!sampler_state->is_cyclic_reference) - { - // This was used in a cyclic ref before, but is missing a barrier - // No need for a full stall, use a custom barrier instead - VkPipelineStageFlags src_stage; - VkAccessFlags src_access; - if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) - { - src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - } - else - { - src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - } - - vk::insert_image_memory_barrier( - *m_current_command_buffer, - raw->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - src_access, VK_ACCESS_SHADER_READ_BIT, - { raw->aspect(), 0, 1, 0, 1 }); - - raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - break; - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - } - } - } - - if (view) [[likely]] - { - m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); - - if (current_fragment_program.redirected_textures & (1 << i)) - { - // Stencil mirror required - auto root_image = static_cast(view->image()); - auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT); - - if (!m_stencil_mirror_sampler) - { - m_stencil_mirror_sampler = std::make_unique(*m_device, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_FALSE, 0.f, 1.f, 0.f, 0.f, - VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, - VK_BORDER_COLOR_INT_OPAQUE_BLACK); - } - - m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, - true); - } + //Requires update, copy subresource + view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); } else { - const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); + switch (auto raw = view->image(); raw->current_layout) + { + default: + //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_GENERAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + if (!sampler_state->is_cyclic_reference) + { + // This was used in a cyclic ref before, but is missing a barrier + // No need for a full stall, use a custom barrier instead + VkPipelineStageFlags src_stage; + VkAccessFlags src_access; + if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) + { + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + else + { + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + vk::insert_image_memory_barrier( + *m_current_command_buffer, + raw->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + src_access, VK_ACCESS_SHADER_READ_BIT, + { raw->aspect(), 0, 1, 0, 1 }); + + raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + } + } + } + + if (view) [[likely]] + { + m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set); + + if (current_fragment_program.redirected_textures & (1 << i)) + { + // Stencil mirror required + auto root_image = static_cast(view->image()); + auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT); + + if (!m_stencil_mirror_sampler) + { + m_stencil_mirror_sampler = std::make_unique(*m_device, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_FALSE, 0.f, 1.f, 0.f, 0.f, + VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, + VK_BORDER_COLOR_INT_OPAQUE_BLACK); + } + + m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set, + true); + } + } + else + { + const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set); + + if (current_fragment_program.redirected_textures & (1 << i)) + { m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, i, ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); - - if (current_fragment_program.redirected_textures & (1 << i)) - { - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, - true); - } + m_current_frame->descriptor_set, + true); } } } - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { - if (current_vp_metadata.referenced_textures_mask & (1 << i)) + if (!(textures_ref & 1)) + continue; + + if (!rsx::method_registers.vertex_textures[i].enabled()) { - if (!rsx::method_registers.vertex_textures[i].enabled()) - { - const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); - - continue; - } - - auto sampler_state = static_cast(vs_sampler_state[i].get()); - auto image_ptr = sampler_state->image_handle; - - if (!image_ptr && sampler_state->validate()) - { - image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); - m_vertex_textures_dirty[i] = true; - } - - if (!image_ptr) - { - rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i); - const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); - - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); - - continue; - } - - switch (auto raw = image_ptr->image(); raw->current_layout) - { - default: - //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - break; - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_GENERAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; - if (!sampler_state->is_cyclic_reference) - { - // Custom barrier, see similar block in FS stage - VkPipelineStageFlags src_stage; - VkAccessFlags src_access; - if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) - { - src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - } - else - { - src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - } - - vk::insert_image_memory_barrier( - *m_current_command_buffer, - raw->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - src_access, VK_ACCESS_SHADER_READ_BIT, - { raw->aspect(), 0, 1, 0, 1 }); - - raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - break; - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - } - - m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, + const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, i, ::glsl::program_domain::glsl_vertex_program, m_current_frame->descriptor_set); + + continue; } + + auto sampler_state = static_cast(vs_sampler_state[i].get()); + auto image_ptr = sampler_state->image_handle; + + if (!image_ptr && sampler_state->validate()) + { + image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); + m_vertex_textures_dirty[i] = true; + } + + if (!image_ptr) + { + rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i); + const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); + + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); + + continue; + } + + switch (auto raw = image_ptr->image(); raw->current_layout) + { + default: + //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_GENERAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + if (!sampler_state->is_cyclic_reference) + { + // Custom barrier, see similar block in FS stage + VkPipelineStageFlags src_stage; + VkAccessFlags src_access; + if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) + { + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + else + { + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + vk::insert_image_memory_barrier( + *m_current_command_buffer, + raw->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + src_access, VK_ACCESS_SHADER_READ_BIT, + { raw->aspect(), 0, 1, 0, 1 }); + + raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + } + + m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); } } @@ -605,81 +611,81 @@ void VKGSRender::bind_interpreter_texture_env() std::advance(end, 16); std::fill(start, end, fallback); - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { - if (current_fp_metadata.referenced_textures_mask & (1 << i)) + if (!(textures_ref & 1)) + continue; + + vk::image_view* view = nullptr; + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) { - vk::image_view* view = nullptr; - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled() && - sampler_state->validate()) + if (view = sampler_state->image_handle; !view) { - if (view = sampler_state->image_handle; !view) + //Requires update, copy subresource + view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); + } + else + { + switch (auto raw = view->image(); raw->current_layout) { - //Requires update, copy subresource - view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); - } - else - { - switch (auto raw = view->image(); raw->current_layout) + default: + //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_GENERAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + if (!sampler_state->is_cyclic_reference) { - default: - //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - break; - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_GENERAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; - if (!sampler_state->is_cyclic_reference) + // This was used in a cyclic ref before, but is missing a barrier + // No need for a full stall, use a custom barrier instead + VkPipelineStageFlags src_stage; + VkAccessFlags src_access; + if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) { - // This was used in a cyclic ref before, but is missing a barrier - // No need for a full stall, use a custom barrier instead - VkPipelineStageFlags src_stage; - VkAccessFlags src_access; - if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) - { - src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - } - else - { - src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - } - - vk::insert_image_memory_barrier( - *m_current_command_buffer, - raw->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - src_access, VK_ACCESS_SHADER_READ_BIT, - { raw->aspect(), 0, 1, 0, 1 }); - - raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; } - break; - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; + else + { + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + vk::insert_image_memory_barrier( + *m_current_command_buffer, + raw->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + src_access, VK_ACCESS_SHADER_READ_BIT, + { raw->aspect(), 0, 1, 0, 1 }); + + raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; } } + } - if (view) - { - const int offsets[] = { 0, 16, 48, 32 }; - auto& sampled_image_info = texture_env[offsets[static_cast(sampler_state->image_type)] + i]; - sampled_image_info = { fs_sampler_handles[i]->value, view->value, view->image()->current_layout }; - } + if (view) + { + const int offsets[] = { 0, 16, 48, 32 }; + auto& sampled_image_info = texture_env[offsets[static_cast(sampler_state->image_type)] + i]; + sampled_image_info = { fs_sampler_handles[i]->value, view->value, view->image()->current_layout }; } } @@ -867,6 +873,9 @@ void VKGSRender::emit_geometry(u32 sub_index) void VKGSRender::begin() { + // Save shader state now before prefetch and loading happens + m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits); + rsx::thread::begin(); if (skip_current_frame || swapchain_unavailable || cond_render_ctrl.disable_rendering()) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index df8a99876b..547d48e846 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1467,7 +1467,7 @@ void VKGSRender::do_local_task(rsx::FIFO_state state) bool VKGSRender::load_program() { - if ((m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits))) + if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits) { get_current_fragment_program(fs_sampler_state); verify(HERE), current_fragment_program.valid; diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index ac2c73413e..54f0bab7d2 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -414,8 +414,8 @@ namespace rsx u16 fp_lighting_flags; u16 fp_shadow_textures; u16 fp_redirected_textures; - u16 fp_alphakill_mask; - u64 fp_zfunc_mask; + u16 unused_0; // Retained for binary compatibility + u64 unused_1; // Retained for binary compatibility pipeline_storage_type pipeline_properties; }; @@ -665,8 +665,6 @@ namespace rsx state_hash ^= rpcs3::hash_base(data.fp_lighting_flags); state_hash ^= rpcs3::hash_base(data.fp_shadow_textures); state_hash ^= rpcs3::hash_base(data.fp_redirected_textures); - state_hash ^= rpcs3::hash_base(data.fp_alphakill_mask); - state_hash ^= rpcs3::hash_base(data.fp_zfunc_mask); std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash); std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name; @@ -740,12 +738,6 @@ namespace rsx fp.shadow_textures = data.fp_shadow_textures; fp.redirected_textures = data.fp_redirected_textures; - for (u8 index = 0; index < 16; ++index) - { - fp.textures_alpha_kill[index] = (data.fp_alphakill_mask & (1 << index))? 1: 0; - fp.textures_zfunc[index] = (data.fp_zfunc_mask >> (index << 2)) & 0xF; - } - return std::make_tuple(pipeline, vp, fp); } @@ -790,12 +782,6 @@ namespace rsx data_block.fp_shadow_textures = fp.shadow_textures; data_block.fp_redirected_textures = fp.redirected_textures; - for (u8 index = 0; index < 16; ++index) - { - data_block.fp_alphakill_mask |= u32(fp.textures_alpha_kill[index] & 0x1) << index; - data_block.fp_zfunc_mask |= u64(fp.textures_zfunc[index] & 0xF) << (index << 2); - } - return data_block; } }; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 4417cc2073..9188c10049 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -481,7 +481,7 @@ namespace rsx stream_data_to_memory_swapped_u32(&rsx::method_registers.transform_program[load_pos * 4 + index % 4] , vm::base(rsx->fifo_ctrl->get_current_arg_ptr()), rcount, 4); - rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty; + rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty; rsx::method_registers.transform_program_load_set(load_pos + ((rcount + index % 4) / 4)); rsx->fifo_ctrl->skip_methods(count - 1); } @@ -491,7 +491,7 @@ namespace rsx { if (method_registers.registers[reg] != method_registers.register_previous_value) { - rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty; + rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty; } } @@ -499,7 +499,7 @@ namespace rsx { if (method_registers.registers[reg] != method_registers.register_previous_value) { - rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty | rsx::pipeline_state::fragment_program_dirty; + rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty; } } @@ -683,7 +683,7 @@ namespace rsx void set_shader_program_dirty(thread* rsx, u32, u32) { - rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; + rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty; } void set_surface_dirty_bit(thread* rsx, u32 reg, u32 arg) @@ -863,7 +863,7 @@ namespace rsx if (rsx->current_fp_metadata.referenced_textures_mask & (1 << index)) { - rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; + rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty; } } }; @@ -877,7 +877,7 @@ namespace rsx if (rsx->current_vp_metadata.referenced_textures_mask & (1 << index)) { - rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty; + rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty; } } }; @@ -3156,6 +3156,10 @@ namespace rsx bind(); bind(); bind(); + bind>(); + bind_array>(); + bind>(); + bind>(); bind(); bind(); bind();