diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index c1d0e2e037..bf66bb4ae1 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -409,11 +409,13 @@ target_sources(rpcs3_emu PRIVATE RSX/Capture/rsx_capture.cpp RSX/Capture/rsx_replay.cpp RSX/GL/GLCommonDecompiler.cpp + RSX/GL/GLDraw.cpp RSX/GL/GLFragmentProgram.cpp RSX/GL/GLGSRender.cpp RSX/GL/GLHelpers.cpp RSX/GL/GLPresent.cpp RSX/GL/GLRenderTargets.cpp + RSX/GL/GLShaderInterpreter.cpp RSX/GL/GLTexture.cpp RSX/GL/GLVertexBuffers.cpp RSX/GL/GLVertexProgram.cpp @@ -425,6 +427,7 @@ if(TARGET 3rdparty_vulkan) RSX/VK/VKCommandStream.cpp RSX/VK/VKCommonDecompiler.cpp RSX/VK/VKDMA.cpp + RSX/VK/VKDraw.cpp RSX/VK/VKFormats.cpp RSX/VK/VKFragmentProgram.cpp RSX/VK/VKFramebuffer.cpp @@ -436,6 +439,7 @@ if(TARGET 3rdparty_vulkan) RSX/VK/VKRenderPass.cpp RSX/VK/VKResolveHelper.cpp RSX/VK/VKResourceManager.cpp + RSX/VK/VKShaderInterpreter.cpp RSX/VK/VKTexture.cpp RSX/VK/VKVertexBuffers.cpp RSX/VK/VKVertexProgram.cpp diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp new file mode 100644 index 0000000000..ccbcb7732c --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -0,0 +1,746 @@ +#include "stdafx.h" +#include "GLGSRender.h" +#include "../Common/BufferUtils.h" + +namespace gl +{ + GLenum comparison_op(rsx::comparison_function op) + { + switch (op) + { + case rsx::comparison_function::never: return GL_NEVER; + case rsx::comparison_function::less: return GL_LESS; + case rsx::comparison_function::equal: return GL_EQUAL; + case rsx::comparison_function::less_or_equal: return GL_LEQUAL; + case rsx::comparison_function::greater: return GL_GREATER; + case rsx::comparison_function::not_equal: return GL_NOTEQUAL; + case rsx::comparison_function::greater_or_equal: return GL_GEQUAL; + case rsx::comparison_function::always: return GL_ALWAYS; + } + fmt::throw_exception("Unsupported comparison op 0x%X" HERE, static_cast(op)); + } + + GLenum stencil_op(rsx::stencil_op op) + { + switch (op) + { + case rsx::stencil_op::invert: return GL_INVERT; + case rsx::stencil_op::keep: return GL_KEEP; + case rsx::stencil_op::zero: return GL_ZERO; + case rsx::stencil_op::replace: return GL_REPLACE; + case rsx::stencil_op::incr: return GL_INCR; + case rsx::stencil_op::decr: return GL_DECR; + case rsx::stencil_op::incr_wrap: return GL_INCR_WRAP; + case rsx::stencil_op::decr_wrap: return GL_DECR_WRAP; + } + fmt::throw_exception("Unsupported stencil op 0x%X" HERE, static_cast(op)); + } + + GLenum blend_equation(rsx::blend_equation op) + { + switch (op) + { + // Note : maybe add is signed on gl + case rsx::blend_equation::add_signed: + rsx_log.trace("blend equation add_signed used. Emulating using FUNC_ADD"); + case rsx::blend_equation::add: return GL_FUNC_ADD; + case rsx::blend_equation::min: return GL_MIN; + case rsx::blend_equation::max: return GL_MAX; + case rsx::blend_equation::substract: return GL_FUNC_SUBTRACT; + case rsx::blend_equation::reverse_substract_signed: + rsx_log.trace("blend equation reverse_subtract_signed used. Emulating using FUNC_REVERSE_SUBTRACT"); + case rsx::blend_equation::reverse_substract: return GL_FUNC_REVERSE_SUBTRACT; + case rsx::blend_equation::reverse_add_signed: + default: + rsx_log.error("Blend equation 0x%X is unimplemented!", static_cast(op)); + return GL_FUNC_ADD; + } + } + + GLenum blend_factor(rsx::blend_factor op) + { + switch (op) + { + case rsx::blend_factor::zero: return GL_ZERO; + case rsx::blend_factor::one: return GL_ONE; + case rsx::blend_factor::src_color: return GL_SRC_COLOR; + case rsx::blend_factor::one_minus_src_color: return GL_ONE_MINUS_SRC_COLOR; + case rsx::blend_factor::dst_color: return GL_DST_COLOR; + case rsx::blend_factor::one_minus_dst_color: return GL_ONE_MINUS_DST_COLOR; + case rsx::blend_factor::src_alpha: return GL_SRC_ALPHA; + case rsx::blend_factor::one_minus_src_alpha: return GL_ONE_MINUS_SRC_ALPHA; + case rsx::blend_factor::dst_alpha: return GL_DST_ALPHA; + case rsx::blend_factor::one_minus_dst_alpha: return GL_ONE_MINUS_DST_ALPHA; + case rsx::blend_factor::src_alpha_saturate: return GL_SRC_ALPHA_SATURATE; + case rsx::blend_factor::constant_color: return GL_CONSTANT_COLOR; + case rsx::blend_factor::one_minus_constant_color: return GL_ONE_MINUS_CONSTANT_COLOR; + case rsx::blend_factor::constant_alpha: return GL_CONSTANT_ALPHA; + case rsx::blend_factor::one_minus_constant_alpha: return GL_ONE_MINUS_CONSTANT_ALPHA; + } + fmt::throw_exception("Unsupported blend factor 0x%X" HERE, static_cast(op)); + } + + GLenum logic_op(rsx::logic_op op) + { + switch (op) + { + case rsx::logic_op::logic_clear: return GL_CLEAR; + case rsx::logic_op::logic_and: return GL_AND; + case rsx::logic_op::logic_and_reverse: return GL_AND_REVERSE; + case rsx::logic_op::logic_copy: return GL_COPY; + case rsx::logic_op::logic_and_inverted: return GL_AND_INVERTED; + case rsx::logic_op::logic_noop: return GL_NOOP; + case rsx::logic_op::logic_xor: return GL_XOR; + case rsx::logic_op::logic_or: return GL_OR; + case rsx::logic_op::logic_nor: return GL_NOR; + case rsx::logic_op::logic_equiv: return GL_EQUIV; + case rsx::logic_op::logic_invert: return GL_INVERT; + case rsx::logic_op::logic_or_reverse: return GL_OR_REVERSE; + case rsx::logic_op::logic_copy_inverted: return GL_COPY_INVERTED; + case rsx::logic_op::logic_or_inverted: return GL_OR_INVERTED; + case rsx::logic_op::logic_nand: return GL_NAND; + case rsx::logic_op::logic_set: return GL_SET; + } + fmt::throw_exception("Unsupported logic op 0x%X" HERE, static_cast(op)); + } + + GLenum front_face(rsx::front_face op) + { + //NOTE: RSX face winding is always based off of upper-left corner like vulkan, but GL is bottom left + //shader_window_origin register does not affect this + //verified with Outrun Online Arcade (window_origin::top) and DS2 (window_origin::bottom) + //correctness of face winding checked using stencil test (GOW collection shadows) + switch (op) + { + case rsx::front_face::cw: return GL_CCW; + case rsx::front_face::ccw: return GL_CW; + } + fmt::throw_exception("Unsupported front face 0x%X" HERE, static_cast(op)); + } + + GLenum cull_face(rsx::cull_face op) + { + switch (op) + { + case rsx::cull_face::front: return GL_FRONT; + case rsx::cull_face::back: return GL_BACK; + case rsx::cull_face::front_and_back: return GL_FRONT_AND_BACK; + } + fmt::throw_exception("Unsupported cull face 0x%X" HERE, static_cast(op)); + } +} + +void GLGSRender::update_draw_state() +{ + m_profiler.start(); + + for (int index = 0; index < m_rtts.get_color_surface_count(); ++index) + { + bool color_mask_b = rsx::method_registers.color_mask_b(index); + bool color_mask_g = rsx::method_registers.color_mask_g(index); + bool color_mask_r = rsx::method_registers.color_mask_r(index); + bool color_mask_a = rsx::method_registers.color_mask_a(index); + + if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8) + { + //Map GB components onto RG + rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); + } + + gl_state.color_maski(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a); + } + + gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); + gl_state.stencil_mask(rsx::method_registers.stencil_mask()); + + gl_state.enable(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled(), GL_DEPTH_CLAMP); + + if (gl_state.enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST)) + { + gl_state.depth_func(gl::comparison_op(rsx::method_registers.depth_func())); + } + + if (glDepthBoundsEXT && (gl_state.enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT))) + { + gl_state.depth_bounds(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); + } + + gl_state.enable(rsx::method_registers.dither_enabled(), GL_DITHER); + + if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST)) + { + glStencilFunc(gl::comparison_op(rsx::method_registers.stencil_func()), + rsx::method_registers.stencil_func_ref(), + rsx::method_registers.stencil_func_mask()); + + glStencilOp(gl::stencil_op(rsx::method_registers.stencil_op_fail()), gl::stencil_op(rsx::method_registers.stencil_op_zfail()), + gl::stencil_op(rsx::method_registers.stencil_op_zpass())); + + if (rsx::method_registers.two_sided_stencil_test_enabled()) + { + glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask()); + + glStencilFuncSeparate(GL_BACK, gl::comparison_op(rsx::method_registers.back_stencil_func()), + rsx::method_registers.back_stencil_func_ref(), rsx::method_registers.back_stencil_func_mask()); + + glStencilOpSeparate(GL_BACK, gl::stencil_op(rsx::method_registers.back_stencil_op_fail()), + gl::stencil_op(rsx::method_registers.back_stencil_op_zfail()), gl::stencil_op(rsx::method_registers.back_stencil_op_zpass())); + } + } + + bool mrt_blend_enabled[] = + { + rsx::method_registers.blend_enabled(), + rsx::method_registers.blend_enabled_surface_1(), + rsx::method_registers.blend_enabled_surface_2(), + rsx::method_registers.blend_enabled_surface_3() + }; + + if (mrt_blend_enabled[0] || mrt_blend_enabled[1] || mrt_blend_enabled[2] || mrt_blend_enabled[3]) + { + glBlendFuncSeparate(gl::blend_factor(rsx::method_registers.blend_func_sfactor_rgb()), + gl::blend_factor(rsx::method_registers.blend_func_dfactor_rgb()), + gl::blend_factor(rsx::method_registers.blend_func_sfactor_a()), + gl::blend_factor(rsx::method_registers.blend_func_dfactor_a())); + + auto blend_colors = rsx::get_constant_blend_colors(); + glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]); + + glBlendEquationSeparate(gl::blend_equation(rsx::method_registers.blend_equation_rgb()), + gl::blend_equation(rsx::method_registers.blend_equation_a())); + } + + gl_state.enablei(mrt_blend_enabled[0], GL_BLEND, 0); + gl_state.enablei(mrt_blend_enabled[1], GL_BLEND, 1); + gl_state.enablei(mrt_blend_enabled[2], GL_BLEND, 2); + gl_state.enablei(mrt_blend_enabled[3], GL_BLEND, 3); + + if (gl_state.enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP)) + { + gl_state.logic_op(gl::logic_op(rsx::method_registers.logic_operation())); + } + + gl_state.line_width(rsx::method_registers.line_width()); + gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH); + + gl_state.enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT); + gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE); + gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL); + + //offset_bias is the constant factor, multiplied by the implementation factor R + //offset_scale is the slope factor, multiplied by the triangle slope factor M + gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias()); + + if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE)) + { + gl_state.cull_face(gl::cull_face(rsx::method_registers.cull_face_mode())); + } + + gl_state.front_face(gl::front_face(rsx::method_registers.front_face_mode())); + + // Sample control + // TODO: MinSampleShading + //gl_state.enable(rsx::method_registers.msaa_enabled(), GL_MULTISAMPLE); + //gl_state.enable(rsx::method_registers.msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE); + //gl_state.enable(rsx::method_registers.msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE); + + //TODO + //NV4097_SET_ANISO_SPREAD + //NV4097_SET_SPECULAR_ENABLE + //NV4097_SET_TWO_SIDE_LIGHT_EN + //NV4097_SET_FLAT_SHADE_OP + //NV4097_SET_EDGE_FLAG + + + + //NV4097_SET_COLOR_KEY_COLOR + //NV4097_SET_SHADER_CONTROL + //NV4097_SET_ZMIN_MAX_CONTROL + //NV4097_SET_ANTI_ALIASING_CONTROL + //NV4097_SET_CLIP_ID_TEST_ENABLE + + m_frame_stats.setup_time += m_profiler.duration(); +} + +void GLGSRender::load_texture_env() +{ + // Load textures + gl::command_context cmd{ gl_state }; + bool update_framebuffer_sourced = false; + + std::lock_guard lock(m_sampler_mutex); + + if (surface_store_tag != m_rtts.cache_tag) + { + update_framebuffer_sourced = true; + surface_store_tag = m_rtts.cache_tag; + } + + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + if (!fs_sampler_state[i]) + fs_sampler_state[i] = std::make_unique(); + + if (m_samplers_dirty || m_textures_dirty[i] || + (update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) + { + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled()) + { + *sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.fragment_textures[i], m_rtts); + + if (m_textures_dirty[i]) + m_fs_sampler_states[i].apply(rsx::method_registers.fragment_textures[i], fs_sampler_state[i].get()); + } + else + { + *sampler_state = {}; + } + + m_textures_dirty[i] = false; + } + } + + for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + { + if (!vs_sampler_state[i]) + vs_sampler_state[i] = std::make_unique(); + + if (m_samplers_dirty || m_vertex_textures_dirty[i] || + (update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) + { + auto sampler_state = static_cast(vs_sampler_state[i].get()); + + if (rsx::method_registers.vertex_textures[i].enabled()) + { + *sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts); + + if (m_vertex_textures_dirty[i]) + m_vs_sampler_states[i].apply(rsx::method_registers.vertex_textures[i], vs_sampler_state[i].get()); + } + else + *sampler_state = {}; + + m_vertex_textures_dirty[i] = false; + } + } + + m_samplers_dirty.store(false); +} + +void GLGSRender::bind_texture_env() +{ + // Bind textures and resolve external copy operations + gl::command_context cmd{ gl_state }; + + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + if (current_fp_metadata.referenced_textures_mask & (1 << i)) + { + _SelectTexture(GL_FRAGMENT_TEXTURES_START + i); + + gl::texture_view* view = nullptr; + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) + { + if (view = sampler_state->image_handle; !view) [[unlikely]] + { + view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); + } + } + + if (view) [[likely]] + { + view->bind(); + + if (current_fragment_program.redirected_textures & (1 << i)) + { + _SelectTexture(GL_STENCIL_MIRRORS_START + i); + + auto root_texture = static_cast(view->image()); + auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil); + stencil_view->bind(); + } + } + else + { + auto target = gl::get_target(current_fragment_program.get_texture_dimension(i)); + glBindTexture(target, m_null_textures[target]->id()); + + if (current_fragment_program.redirected_textures & (1 << i)) + { + _SelectTexture(GL_STENCIL_MIRRORS_START + i); + glBindTexture(target, m_null_textures[target]->id()); + } + } + } + } + + for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + { + if (current_vp_metadata.referenced_textures_mask & (1 << i)) + { + auto sampler_state = static_cast(vs_sampler_state[i].get()); + _SelectTexture(GL_VERTEX_TEXTURES_START + i); + + if (rsx::method_registers.vertex_textures[i].enabled() && + sampler_state->validate()) + { + if (sampler_state->image_handle) [[likely]] + { + sampler_state->image_handle->bind(); + } + else + { + m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(); + } + } + else + { + glBindTexture(GL_TEXTURE_2D, GL_NONE); + } + } + } +} + +void GLGSRender::emit_geometry(u32 sub_index) +{ + const auto do_heap_cleanup = [this]() + { + if (manually_flush_ring_buffers) + { + m_attrib_ring_buffer->unmap(); + m_index_ring_buffer->unmap(); + } + else + { + //DMA push; not needed with MAP_COHERENT + //glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); + } + }; + + if (!sub_index) + { + analyse_inputs_interleaved(m_vertex_layout); + if (!m_vertex_layout.validate()) + { + // Execute remainining pipeline barriers with NOP draw + do + { + rsx::method_registers.current_draw_clause.execute_pipeline_dependencies(); + } + while (rsx::method_registers.current_draw_clause.next()); + + rsx::method_registers.current_draw_clause.end(); + return; + } + } + else + { + if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed) + { + // Rebase vertex bases instead of + for (auto &info : m_vertex_layout.interleaved_blocks) + { + const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); + info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location, HERE); + } + } + } + + if (manually_flush_ring_buffers) + { + //Use approximations to reserve space. This path is mostly for debug purposes anyway + u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); + u32 approx_working_buffer_size = approx_vertex_count * 256; + + //Allocate 256K heap if we have no approximation at this time (inlined array) + m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U)); + m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); + } + + // Do vertex upload before RTT prep / texture lookups to give the driver time to push data + auto upload_info = set_vertex_buffer(); + do_heap_cleanup(); + + if (upload_info.vertex_draw_count == 0) + { + // Malformed vertex setup; abort + return; + } + + const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive); + update_vertex_env(upload_info); + + if (!upload_info.index_info) + { + if (rsx::method_registers.current_draw_clause.is_single_draw()) + { + glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count); + } + else + { + const auto subranges = rsx::method_registers.current_draw_clause.get_subranges(); + const auto draw_count = subranges.size(); + const auto driver_caps = gl::get_driver_caps(); + bool use_draw_arrays_fallback = false; + + m_scratch_buffer.resize(draw_count * 24); + GLint* firsts = reinterpret_cast(m_scratch_buffer.data()); + GLsizei* counts = (firsts + draw_count); + const GLvoid** offsets = reinterpret_cast(counts + draw_count); + + u32 first = 0; + u32 dst_index = 0; + for (const auto &range : subranges) + { + firsts[dst_index] = first; + counts[dst_index] = range.count; + offsets[dst_index++] = reinterpret_cast(u64{first << 2}); + + if (driver_caps.vendor_AMD && (first + range.count) > (0x100000 >> 2)) + { + //Unlikely, but added here in case the identity buffer is not large enough somehow + use_draw_arrays_fallback = true; + break; + } + + first += range.count; + } + + if (use_draw_arrays_fallback) + { + //MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more + for (u32 n = 0; n < draw_count; ++n) + { + glDrawArrays(draw_mode, firsts[n], counts[n]); + } + } + else if (driver_caps.vendor_AMD) + { + //Use identity index buffer to fix broken vertexID on AMD + m_identity_index_buffer->bind(); + glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, static_cast(draw_count)); + } + else + { + //Normal render + glMultiDrawArrays(draw_mode, firsts, counts, static_cast(draw_count)); + } + } + } + else + { + const GLenum index_type = std::get<0>(*upload_info.index_info); + const u32 index_offset = std::get<1>(*upload_info.index_info); + const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive; + + if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) + { + glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT) ? 0xffff : 0xffffffff); + } + + m_index_ring_buffer->bind(); + + if (rsx::method_registers.current_draw_clause.is_single_draw()) + { + glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast(u64{index_offset})); + } + else + { + const auto subranges = rsx::method_registers.current_draw_clause.get_subranges(); + const auto draw_count = subranges.size(); + const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2; + uintptr_t index_ptr = index_offset; + m_scratch_buffer.resize(draw_count * 16); + + GLsizei *counts = reinterpret_cast(m_scratch_buffer.data()); + const GLvoid** offsets = reinterpret_cast(counts + draw_count); + int dst_index = 0; + + for (const auto &range : subranges) + { + const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count); + counts[dst_index] = index_size; + offsets[dst_index++] = reinterpret_cast(index_ptr); + + index_ptr += (index_size << type_scale); + } + + glMultiDrawElements(draw_mode, counts, index_type, offsets, static_cast(draw_count)); + } + } +} + +void GLGSRender::begin() +{ + rsx::thread::begin(); + + if (skip_current_frame || cond_render_ctrl.disable_rendering()) + return; + + init_buffers(rsx::framebuffer_creation_context::context_draw); +} + +void GLGSRender::end() +{ + m_profiler.start(); + + if (skip_current_frame || !framebuffer_status_valid || cond_render_ctrl.disable_rendering()) + { + execute_nop_draw(); + rsx::thread::end(); + return; + } + + m_frame_stats.setup_time += m_profiler.duration(); + + gl::command_context cmd{ gl_state }; + gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); + + // Handle special memory barrier for ARGB8->D24S8 in an active DSV + if (ds && ds->old_contents.size() == 1 && + ds->old_contents[0].source->get_internal_format() == gl::texture::internal_format::rgba8) + { + gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); + + // TODO: Stencil transfer + gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF); + ds->old_contents[0].init_transfer(ds); + + m_depth_converter.run(ds->old_contents[0].src_rect(), + ds->old_contents[0].dst_rect(), + ds->old_contents[0].source, ds); + + ds->on_write(); + } + + // Active texture environment is used to decode shaders + m_profiler.start(); + load_texture_env(); + m_frame_stats.textures_upload_time += m_profiler.duration(); + + // NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible + // TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip + if (!load_program()) + { + // Program is not ready, skip drawing this + std::this_thread::yield(); + execute_nop_draw(); + // m_rtts.on_write(); - breaks games for obvious reasons + rsx::thread::end(); + return; + } + + // Load program execution environment + load_program_env(); + m_frame_stats.setup_time += m_profiler.duration(); + + bind_texture_env(); + m_gl_texture_cache.release_uncached_temporary_subresources(); + m_frame_stats.textures_upload_time += m_profiler.duration(); + + // Optionally do memory synchronization if the texture stage has not yet triggered this + if (true)//g_cfg.video.strict_rendering_mode) + { + gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); + + if (ds) ds->write_barrier(cmd); + + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (auto surface = std::get<1>(rtt)) + { + surface->write_barrier(cmd); + } + } + } + else + { + rsx::simple_array buffers_to_clear; + bool clear_all_color = true; + bool clear_depth = false; + + for (int index = 0; index < 4; index++) + { + if (m_rtts.m_bound_render_targets[index].first) + { + if (!m_rtts.m_bound_render_targets[index].second->dirty()) + clear_all_color = false; + else + buffers_to_clear.push_back(index); + } + } + + if (ds && ds->dirty()) + { + clear_depth = true; + } + + if (clear_depth || !buffers_to_clear.empty()) + { + gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); + GLenum mask = 0; + + if (clear_depth) + { + gl_state.depth_mask(GL_TRUE); + gl_state.clear_depth(1.f); + gl_state.clear_stencil(255); + mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + if (clear_all_color) + mask |= GL_COLOR_BUFFER_BIT; + + glClear(mask); + + if (!buffers_to_clear.empty() && !clear_all_color) + { + GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f }; + //It is impossible for the render target to be type A or B here (clear all would have been flagged) + for (auto &i : buffers_to_clear) + glClearBufferfv(GL_COLOR, i, colors); + } + + if (clear_depth) + gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); + } + } + + // Unconditionally enable stencil test if it was disabled before + gl_state.enable(GL_TRUE, GL_SCISSOR_TEST); + + update_draw_state(); + + if (g_cfg.video.debug_output) + { + m_program->validate(); + } + + rsx::method_registers.current_draw_clause.begin(); + u32 subdraw = 0u; + do + { + emit_geometry(subdraw++); + } + while (rsx::method_registers.current_draw_clause.next()); + + m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled); + + m_attrib_ring_buffer->notify(); + m_index_ring_buffer->notify(); + m_fragment_env_buffer->notify(); + m_vertex_env_buffer->notify(); + m_texture_parameters_buffer->notify(); + m_vertex_layout_buffer->notify(); + m_fragment_constants_buffer->notify(); + m_transform_constants_buffer->notify(); + + m_frame_stats.textures_upload_time += m_profiler.duration(); + + rsx::thread::end(); +} diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index c785a531c3..389ca79437 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1,12 +1,9 @@ #include "stdafx.h" +#include "../Overlays/overlay_shader_compile_notification.h" +#include "../Overlays/Shaders/shader_loading_dialog_native.h" #include "GLGSRender.h" #include "GLCompute.h" #include "GLVertexProgram.h" -#include "../Overlays/overlay_shader_compile_notification.h" -#include "../Overlays/Shaders/shader_loading_dialog_native.h" -#include "../rsx_methods.h" -#include "../Common/BufferUtils.h" -#include "../rsx_utils.h" #define DUMP_VERTEX_DATA 0 @@ -44,605 +41,6 @@ GLGSRender::GLGSRender() : GSRender() extern CellGcmContextData current_context; -namespace -{ - GLenum comparison_op(rsx::comparison_function op) - { - switch (op) - { - case rsx::comparison_function::never: return GL_NEVER; - case rsx::comparison_function::less: return GL_LESS; - case rsx::comparison_function::equal: return GL_EQUAL; - case rsx::comparison_function::less_or_equal: return GL_LEQUAL; - case rsx::comparison_function::greater: return GL_GREATER; - case rsx::comparison_function::not_equal: return GL_NOTEQUAL; - case rsx::comparison_function::greater_or_equal: return GL_GEQUAL; - case rsx::comparison_function::always: return GL_ALWAYS; - } - fmt::throw_exception("Unsupported comparison op 0x%X" HERE, static_cast(op)); - } - - GLenum stencil_op(rsx::stencil_op op) - { - switch (op) - { - case rsx::stencil_op::invert: return GL_INVERT; - case rsx::stencil_op::keep: return GL_KEEP; - case rsx::stencil_op::zero: return GL_ZERO; - case rsx::stencil_op::replace: return GL_REPLACE; - case rsx::stencil_op::incr: return GL_INCR; - case rsx::stencil_op::decr: return GL_DECR; - case rsx::stencil_op::incr_wrap: return GL_INCR_WRAP; - case rsx::stencil_op::decr_wrap: return GL_DECR_WRAP; - } - fmt::throw_exception("Unsupported stencil op 0x%X" HERE, static_cast(op)); - } - - GLenum blend_equation(rsx::blend_equation op) - { - switch (op) - { - // Note : maybe add is signed on gl - case rsx::blend_equation::add_signed: - rsx_log.trace("blend equation add_signed used. Emulating using FUNC_ADD"); - case rsx::blend_equation::add: return GL_FUNC_ADD; - case rsx::blend_equation::min: return GL_MIN; - case rsx::blend_equation::max: return GL_MAX; - case rsx::blend_equation::substract: return GL_FUNC_SUBTRACT; - case rsx::blend_equation::reverse_substract_signed: - rsx_log.trace("blend equation reverse_subtract_signed used. Emulating using FUNC_REVERSE_SUBTRACT"); - case rsx::blend_equation::reverse_substract: return GL_FUNC_REVERSE_SUBTRACT; - case rsx::blend_equation::reverse_add_signed: - default: - rsx_log.error("Blend equation 0x%X is unimplemented!", static_cast(op)); - return GL_FUNC_ADD; - } - } - - GLenum blend_factor(rsx::blend_factor op) - { - switch (op) - { - case rsx::blend_factor::zero: return GL_ZERO; - case rsx::blend_factor::one: return GL_ONE; - case rsx::blend_factor::src_color: return GL_SRC_COLOR; - case rsx::blend_factor::one_minus_src_color: return GL_ONE_MINUS_SRC_COLOR; - case rsx::blend_factor::dst_color: return GL_DST_COLOR; - case rsx::blend_factor::one_minus_dst_color: return GL_ONE_MINUS_DST_COLOR; - case rsx::blend_factor::src_alpha: return GL_SRC_ALPHA; - case rsx::blend_factor::one_minus_src_alpha: return GL_ONE_MINUS_SRC_ALPHA; - case rsx::blend_factor::dst_alpha: return GL_DST_ALPHA; - case rsx::blend_factor::one_minus_dst_alpha: return GL_ONE_MINUS_DST_ALPHA; - case rsx::blend_factor::src_alpha_saturate: return GL_SRC_ALPHA_SATURATE; - case rsx::blend_factor::constant_color: return GL_CONSTANT_COLOR; - case rsx::blend_factor::one_minus_constant_color: return GL_ONE_MINUS_CONSTANT_COLOR; - case rsx::blend_factor::constant_alpha: return GL_CONSTANT_ALPHA; - case rsx::blend_factor::one_minus_constant_alpha: return GL_ONE_MINUS_CONSTANT_ALPHA; - } - fmt::throw_exception("Unsupported blend factor 0x%X" HERE, static_cast(op)); - } - - GLenum logic_op(rsx::logic_op op) - { - switch (op) - { - case rsx::logic_op::logic_clear: return GL_CLEAR; - case rsx::logic_op::logic_and: return GL_AND; - case rsx::logic_op::logic_and_reverse: return GL_AND_REVERSE; - case rsx::logic_op::logic_copy: return GL_COPY; - case rsx::logic_op::logic_and_inverted: return GL_AND_INVERTED; - case rsx::logic_op::logic_noop: return GL_NOOP; - case rsx::logic_op::logic_xor: return GL_XOR; - case rsx::logic_op::logic_or: return GL_OR; - case rsx::logic_op::logic_nor: return GL_NOR; - case rsx::logic_op::logic_equiv: return GL_EQUIV; - case rsx::logic_op::logic_invert: return GL_INVERT; - case rsx::logic_op::logic_or_reverse: return GL_OR_REVERSE; - case rsx::logic_op::logic_copy_inverted: return GL_COPY_INVERTED; - case rsx::logic_op::logic_or_inverted: return GL_OR_INVERTED; - case rsx::logic_op::logic_nand: return GL_NAND; - case rsx::logic_op::logic_set: return GL_SET; - } - fmt::throw_exception("Unsupported logic op 0x%X" HERE, static_cast(op)); - } - - GLenum front_face(rsx::front_face op) - { - //NOTE: RSX face winding is always based off of upper-left corner like vulkan, but GL is bottom left - //shader_window_origin register does not affect this - //verified with Outrun Online Arcade (window_origin::top) and DS2 (window_origin::bottom) - //correctness of face winding checked using stencil test (GOW collection shadows) - switch (op) - { - case rsx::front_face::cw: return GL_CCW; - case rsx::front_face::ccw: return GL_CW; - } - fmt::throw_exception("Unsupported front face 0x%X" HERE, static_cast(op)); - } - - GLenum cull_face(rsx::cull_face op) - { - switch (op) - { - case rsx::cull_face::front: return GL_FRONT; - case rsx::cull_face::back: return GL_BACK; - case rsx::cull_face::front_and_back: return GL_FRONT_AND_BACK; - } - fmt::throw_exception("Unsupported cull face 0x%X" HERE, static_cast(op)); - } -} - -void GLGSRender::begin() -{ - rsx::thread::begin(); - - if (skip_current_frame || cond_render_ctrl.disable_rendering()) - return; - - init_buffers(rsx::framebuffer_creation_context::context_draw); -} - -void GLGSRender::end() -{ - m_profiler.start(); - - if (skip_current_frame || !framebuffer_status_valid || cond_render_ctrl.disable_rendering()) - { - execute_nop_draw(); - rsx::thread::end(); - return; - } - - m_frame_stats.setup_time += m_profiler.duration(); - - const auto do_heap_cleanup = [this]() - { - if (manually_flush_ring_buffers) - { - m_attrib_ring_buffer->unmap(); - m_index_ring_buffer->unmap(); - } - else - { - //DMA push; not needed with MAP_COHERENT - //glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); - } - }; - - gl::command_context cmd{ gl_state }; - gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); - - // Handle special memory barrier for ARGB8->D24S8 in an active DSV - if (ds && ds->old_contents.size() == 1 && - ds->old_contents[0].source->get_internal_format() == gl::texture::internal_format::rgba8) - { - gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); - - // TODO: Stencil transfer - gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF); - ds->old_contents[0].init_transfer(ds); - - m_depth_converter.run(ds->old_contents[0].src_rect(), - ds->old_contents[0].dst_rect(), - ds->old_contents[0].source, ds); - - ds->on_write(); - } - - // Load textures - { - m_profiler.start(); - - std::lock_guard lock(m_sampler_mutex); - bool update_framebuffer_sourced = false; - - if (surface_store_tag != m_rtts.cache_tag) - { - update_framebuffer_sourced = true; - surface_store_tag = m_rtts.cache_tag; - } - - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - if (!fs_sampler_state[i]) - fs_sampler_state[i] = std::make_unique(); - - if (m_samplers_dirty || m_textures_dirty[i] || - (update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) - { - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled()) - { - *sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.fragment_textures[i], m_rtts); - - if (m_textures_dirty[i]) - m_fs_sampler_states[i].apply(rsx::method_registers.fragment_textures[i], fs_sampler_state[i].get()); - } - else - { - *sampler_state = {}; - } - - m_textures_dirty[i] = false; - } - } - - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) - { - if (!vs_sampler_state[i]) - vs_sampler_state[i] = std::make_unique(); - - if (m_samplers_dirty || m_vertex_textures_dirty[i] || - (update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) - { - auto sampler_state = static_cast(vs_sampler_state[i].get()); - - if (rsx::method_registers.vertex_textures[i].enabled()) - { - *sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts); - - if (m_vertex_textures_dirty[i]) - m_vs_sampler_states[i].apply(rsx::method_registers.vertex_textures[i], vs_sampler_state[i].get()); - } - else - *sampler_state = {}; - - m_vertex_textures_dirty[i] = false; - } - } - - m_samplers_dirty.store(false); - - m_frame_stats.textures_upload_time += m_profiler.duration(); - } - - // NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible - // TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip - if (!load_program()) - { - // Program is not ready, skip drawing this - std::this_thread::yield(); - execute_nop_draw(); - // m_rtts.on_write(); - breaks games for obvious reasons - rsx::thread::end(); - return; - } - - // Load program execution environment - load_program_env(); - - m_frame_stats.setup_time += m_profiler.duration(); - - //Bind textures and resolve external copy operations - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - if (current_fp_metadata.referenced_textures_mask & (1 << i)) - { - _SelectTexture(GL_FRAGMENT_TEXTURES_START + i); - - gl::texture_view* view = nullptr; - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled() && - sampler_state->validate()) - { - if (view = sampler_state->image_handle; !view) [[unlikely]] - { - view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); - } - } - - if (view) [[likely]] - { - view->bind(); - - if (current_fragment_program.redirected_textures & (1 << i)) - { - _SelectTexture(GL_STENCIL_MIRRORS_START + i); - - auto root_texture = static_cast(view->image()); - auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil); - stencil_view->bind(); - } - } - else - { - auto target = gl::get_target(current_fragment_program.get_texture_dimension(i)); - glBindTexture(target, m_null_textures[target]->id()); - - if (current_fragment_program.redirected_textures & (1 << i)) - { - _SelectTexture(GL_STENCIL_MIRRORS_START + i); - glBindTexture(target, m_null_textures[target]->id()); - } - } - } - } - - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) - { - if (current_vp_metadata.referenced_textures_mask & (1 << i)) - { - auto sampler_state = static_cast(vs_sampler_state[i].get()); - _SelectTexture(GL_VERTEX_TEXTURES_START + i); - - if (rsx::method_registers.vertex_textures[i].enabled() && - sampler_state->validate()) - { - if (sampler_state->image_handle) [[likely]] - { - sampler_state->image_handle->bind(); - } - else - { - m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(); - } - } - else - { - glBindTexture(GL_TEXTURE_2D, GL_NONE); - } - } - } - - m_gl_texture_cache.release_uncached_temporary_subresources(); - - m_frame_stats.textures_upload_time += m_profiler.duration(); - - // Optionally do memory synchronization if the texture stage has not yet triggered this - if (true)//g_cfg.video.strict_rendering_mode) - { - gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); - - if (ds) ds->write_barrier(cmd); - - for (auto &rtt : m_rtts.m_bound_render_targets) - { - if (auto surface = std::get<1>(rtt)) - { - surface->write_barrier(cmd); - } - } - } - else - { - rsx::simple_array buffers_to_clear; - bool clear_all_color = true; - bool clear_depth = false; - - for (int index = 0; index < 4; index++) - { - if (m_rtts.m_bound_render_targets[index].first) - { - if (!m_rtts.m_bound_render_targets[index].second->dirty()) - clear_all_color = false; - else - buffers_to_clear.push_back(index); - } - } - - if (ds && ds->dirty()) - { - clear_depth = true; - } - - if (clear_depth || !buffers_to_clear.empty()) - { - gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); - GLenum mask = 0; - - if (clear_depth) - { - gl_state.depth_mask(GL_TRUE); - gl_state.clear_depth(1.f); - gl_state.clear_stencil(255); - mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - if (clear_all_color) - mask |= GL_COLOR_BUFFER_BIT; - - glClear(mask); - - if (!buffers_to_clear.empty() && !clear_all_color) - { - GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f }; - //It is impossible for the render target to be type A or B here (clear all would have been flagged) - for (auto &i : buffers_to_clear) - glClearBufferfv(GL_COLOR, i, colors); - } - - if (clear_depth) - gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); - } - } - - // Unconditionally enable stencil test if it was disabled before - gl_state.enable(GL_TRUE, GL_SCISSOR_TEST); - - update_draw_state(); - - if (g_cfg.video.debug_output) - { - m_program->validate(); - } - - const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive); - rsx::method_registers.current_draw_clause.begin(); - int subdraw = 0; - do - { - if (!subdraw) - { - analyse_inputs_interleaved(m_vertex_layout); - if (!m_vertex_layout.validate()) - { - // Execute remainining pipeline barriers with NOP draw - do - { - rsx::method_registers.current_draw_clause.execute_pipeline_dependencies(); - } - while (rsx::method_registers.current_draw_clause.next()); - - rsx::method_registers.current_draw_clause.end(); - break; - } - } - else - { - if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed) - { - // Rebase vertex bases instead of - for (auto &info : m_vertex_layout.interleaved_blocks) - { - const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); - info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location, HERE); - } - } - } - - ++subdraw; - - if (manually_flush_ring_buffers) - { - //Use approximations to reserve space. This path is mostly for debug purposes anyway - u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); - u32 approx_working_buffer_size = approx_vertex_count * 256; - - //Allocate 256K heap if we have no approximation at this time (inlined array) - m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U)); - m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); - } - - //Do vertex upload before RTT prep / texture lookups to give the driver time to push data - auto upload_info = set_vertex_buffer(); - do_heap_cleanup(); - - if (upload_info.vertex_draw_count == 0) - { - // Malformed vertex setup; abort - continue; - } - - update_vertex_env(upload_info); - - if (!upload_info.index_info) - { - if (rsx::method_registers.current_draw_clause.is_single_draw()) - { - glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count); - } - else - { - const auto subranges = rsx::method_registers.current_draw_clause.get_subranges(); - const auto draw_count = subranges.size(); - const auto driver_caps = gl::get_driver_caps(); - bool use_draw_arrays_fallback = false; - - m_scratch_buffer.resize(draw_count * 24); - GLint* firsts = reinterpret_cast(m_scratch_buffer.data()); - GLsizei* counts = (firsts + draw_count); - const GLvoid** offsets = reinterpret_cast(counts + draw_count); - - u32 first = 0; - u32 dst_index = 0; - for (const auto &range : subranges) - { - firsts[dst_index] = first; - counts[dst_index] = range.count; - offsets[dst_index++] = reinterpret_cast(u64{first << 2}); - - if (driver_caps.vendor_AMD && (first + range.count) > (0x100000 >> 2)) - { - //Unlikely, but added here in case the identity buffer is not large enough somehow - use_draw_arrays_fallback = true; - break; - } - - first += range.count; - } - - if (use_draw_arrays_fallback) - { - //MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more - for (u32 n = 0; n < draw_count; ++n) - { - glDrawArrays(draw_mode, firsts[n], counts[n]); - } - } - else if (driver_caps.vendor_AMD) - { - //Use identity index buffer to fix broken vertexID on AMD - m_identity_index_buffer->bind(); - glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, static_cast(draw_count)); - } - else - { - //Normal render - glMultiDrawArrays(draw_mode, firsts, counts, static_cast(draw_count)); - } - } - } - else - { - const GLenum index_type = std::get<0>(*upload_info.index_info); - const u32 index_offset = std::get<1>(*upload_info.index_info); - const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive; - - if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) - { - glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT) ? 0xffff : 0xffffffff); - } - - m_index_ring_buffer->bind(); - - if (rsx::method_registers.current_draw_clause.is_single_draw()) - { - glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast(u64{index_offset})); - } - else - { - const auto subranges = rsx::method_registers.current_draw_clause.get_subranges(); - const auto draw_count = subranges.size(); - const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2; - uintptr_t index_ptr = index_offset; - m_scratch_buffer.resize(draw_count * 16); - - GLsizei *counts = reinterpret_cast(m_scratch_buffer.data()); - const GLvoid** offsets = reinterpret_cast(counts + draw_count); - int dst_index = 0; - - for (const auto &range : subranges) - { - const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count); - counts[dst_index] = index_size; - offsets[dst_index++] = reinterpret_cast(index_ptr); - - index_ptr += (index_size << type_scale); - } - - glMultiDrawElements(draw_mode, counts, index_type, offsets, static_cast(draw_count)); - } - } - } while (rsx::method_registers.current_draw_clause.next()); - - m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled); - - m_attrib_ring_buffer->notify(); - m_index_ring_buffer->notify(); - m_fragment_env_buffer->notify(); - m_vertex_env_buffer->notify(); - m_texture_parameters_buffer->notify(); - m_vertex_layout_buffer->notify(); - m_fragment_constants_buffer->notify(); - m_transform_constants_buffer->notify(); - - m_frame_stats.textures_upload_time += m_profiler.duration(); - - rsx::thread::end(); -} - void GLGSRender::set_viewport() { // NOTE: scale offset matrix already contains the viewport transformation @@ -1352,138 +750,6 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info) } } -void GLGSRender::update_draw_state() -{ - m_profiler.start(); - - for (int index = 0; index < m_rtts.get_color_surface_count(); ++index) - { - bool color_mask_b = rsx::method_registers.color_mask_b(index); - bool color_mask_g = rsx::method_registers.color_mask_g(index); - bool color_mask_r = rsx::method_registers.color_mask_r(index); - bool color_mask_a = rsx::method_registers.color_mask_a(index); - - if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8) - { - //Map GB components onto RG - rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); - } - - gl_state.color_maski(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a); - } - - gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); - gl_state.stencil_mask(rsx::method_registers.stencil_mask()); - - gl_state.enable(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled(), GL_DEPTH_CLAMP); - - if (gl_state.enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST)) - { - gl_state.depth_func(comparison_op(rsx::method_registers.depth_func())); - } - - if (glDepthBoundsEXT && (gl_state.enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT))) - { - gl_state.depth_bounds(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); - } - - gl_state.enable(rsx::method_registers.dither_enabled(), GL_DITHER); - - if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST)) - { - glStencilFunc(comparison_op(rsx::method_registers.stencil_func()), - rsx::method_registers.stencil_func_ref(), - rsx::method_registers.stencil_func_mask()); - - glStencilOp(stencil_op(rsx::method_registers.stencil_op_fail()), stencil_op(rsx::method_registers.stencil_op_zfail()), - stencil_op(rsx::method_registers.stencil_op_zpass())); - - if (rsx::method_registers.two_sided_stencil_test_enabled()) - { - glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask()); - - glStencilFuncSeparate(GL_BACK, comparison_op(rsx::method_registers.back_stencil_func()), - rsx::method_registers.back_stencil_func_ref(), rsx::method_registers.back_stencil_func_mask()); - - glStencilOpSeparate(GL_BACK, stencil_op(rsx::method_registers.back_stencil_op_fail()), - stencil_op(rsx::method_registers.back_stencil_op_zfail()), stencil_op(rsx::method_registers.back_stencil_op_zpass())); - } - } - - bool mrt_blend_enabled[] = - { - rsx::method_registers.blend_enabled(), - rsx::method_registers.blend_enabled_surface_1(), - rsx::method_registers.blend_enabled_surface_2(), - rsx::method_registers.blend_enabled_surface_3() - }; - - if (mrt_blend_enabled[0] || mrt_blend_enabled[1] || mrt_blend_enabled[2] || mrt_blend_enabled[3]) - { - glBlendFuncSeparate(blend_factor(rsx::method_registers.blend_func_sfactor_rgb()), - blend_factor(rsx::method_registers.blend_func_dfactor_rgb()), - blend_factor(rsx::method_registers.blend_func_sfactor_a()), - blend_factor(rsx::method_registers.blend_func_dfactor_a())); - - auto blend_colors = rsx::get_constant_blend_colors(); - glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]); - - glBlendEquationSeparate(blend_equation(rsx::method_registers.blend_equation_rgb()), - blend_equation(rsx::method_registers.blend_equation_a())); - } - - gl_state.enablei(mrt_blend_enabled[0], GL_BLEND, 0); - gl_state.enablei(mrt_blend_enabled[1], GL_BLEND, 1); - gl_state.enablei(mrt_blend_enabled[2], GL_BLEND, 2); - gl_state.enablei(mrt_blend_enabled[3], GL_BLEND, 3); - - if (gl_state.enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP)) - { - gl_state.logic_op(logic_op(rsx::method_registers.logic_operation())); - } - - gl_state.line_width(rsx::method_registers.line_width()); - gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH); - - gl_state.enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT); - gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE); - gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL); - - //offset_bias is the constant factor, multiplied by the implementation factor R - //offset_scale is the slope factor, multiplied by the triangle slope factor M - gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias()); - - if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE)) - { - gl_state.cull_face(cull_face(rsx::method_registers.cull_face_mode())); - } - - gl_state.front_face(front_face(rsx::method_registers.front_face_mode())); - - // Sample control - // TODO: MinSampleShading - //gl_state.enable(rsx::method_registers.msaa_enabled(), GL_MULTISAMPLE); - //gl_state.enable(rsx::method_registers.msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE); - //gl_state.enable(rsx::method_registers.msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE); - - //TODO - //NV4097_SET_ANISO_SPREAD - //NV4097_SET_SPECULAR_ENABLE - //NV4097_SET_TWO_SIDE_LIGHT_EN - //NV4097_SET_FLAT_SHADE_OP - //NV4097_SET_EDGE_FLAG - - - - //NV4097_SET_COLOR_KEY_COLOR - //NV4097_SET_SHADER_CONTROL - //NV4097_SET_ZMIN_MAX_CONTROL - //NV4097_SET_ANTI_ALIASING_CONTROL - //NV4097_SET_CLIP_ID_TEST_ENABLE - - m_frame_stats.setup_time += m_profiler.duration(); -} - bool GLGSRender::on_access_violation(u32 address, bool is_writing) { const bool can_flush = (std::this_thread::get_id() == m_rsx_thread); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 0d626debc7..5df5236919 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -75,6 +75,7 @@ private: gl::sampler_state m_vs_sampler_states[rsx::limits::vertex_textures_count]; // Vertex textures gl::glsl::program *m_program; + gl::glsl::program m_shader_interpreter; gl_render_targets m_rtts; @@ -154,6 +155,9 @@ private: void update_draw_state(); + void load_texture_env(); + void bind_texture_env(); + gl::texture* get_present_source(gl::present_surface_info* info, const rsx::avconf* avconfig); public: @@ -174,6 +178,7 @@ protected: void clear_surface(u32 arg) override; void begin() override; void end() override; + void emit_geometry(u32 sub_index) override; void on_init_thread() override; void on_exit() override; diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp new file mode 100644 index 0000000000..564412cbbc --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -0,0 +1,7 @@ +#include "stdafx.h" +#include "GLShaderInterpreter.h" + +namespace gl +{ + +} diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h new file mode 100644 index 0000000000..a53dab1f07 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h @@ -0,0 +1,15 @@ +#pragma once +#include "GLGSRender.h" + +namespace gl +{ + class shader_interpreter : glsl::program + { + glsl::shader vs; + glsl::shader fs; + + public: + void create(); + void destroy(); + }; +} diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp new file mode 100644 index 0000000000..e1d82ee4e2 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -0,0 +1,930 @@ +#include "stdafx.h" +#include "VKGSRender.h" +#include "../Common/BufferUtils.h" + +namespace vk +{ + VkImageViewType get_view_type(rsx::texture_dimension_extended type) + { + switch (type) + { + case rsx::texture_dimension_extended::texture_dimension_1d: + return VK_IMAGE_VIEW_TYPE_1D; + case rsx::texture_dimension_extended::texture_dimension_2d: + return VK_IMAGE_VIEW_TYPE_2D; + case rsx::texture_dimension_extended::texture_dimension_cubemap: + return VK_IMAGE_VIEW_TYPE_CUBE; + case rsx::texture_dimension_extended::texture_dimension_3d: + return VK_IMAGE_VIEW_TYPE_3D; + default: ASSUME(0); + }; + } + + VkCompareOp get_compare_func(rsx::comparison_function op, bool reverse_direction = false) + { + switch (op) + { + case rsx::comparison_function::never: return VK_COMPARE_OP_NEVER; + case rsx::comparison_function::greater: return reverse_direction ? VK_COMPARE_OP_LESS: VK_COMPARE_OP_GREATER; + case rsx::comparison_function::less: return reverse_direction ? VK_COMPARE_OP_GREATER: VK_COMPARE_OP_LESS; + case rsx::comparison_function::less_or_equal: return reverse_direction ? VK_COMPARE_OP_GREATER_OR_EQUAL: VK_COMPARE_OP_LESS_OR_EQUAL; + case rsx::comparison_function::greater_or_equal: return reverse_direction ? VK_COMPARE_OP_LESS_OR_EQUAL: VK_COMPARE_OP_GREATER_OR_EQUAL; + case rsx::comparison_function::equal: return VK_COMPARE_OP_EQUAL; + case rsx::comparison_function::not_equal: return VK_COMPARE_OP_NOT_EQUAL; + case rsx::comparison_function::always: return VK_COMPARE_OP_ALWAYS; + default: + fmt::throw_exception("Unknown compare op: 0x%x" HERE, static_cast(op)); + } + } +} + +void VKGSRender::begin_render_pass() +{ + vk::begin_renderpass( + *m_current_command_buffer, + get_render_pass(), + m_draw_fbo->value, + { positionu{0u, 0u}, sizeu{m_draw_fbo->width(), m_draw_fbo->height()} }); +} + +void VKGSRender::close_render_pass() +{ + vk::end_renderpass(*m_current_command_buffer); +} + +VkRenderPass VKGSRender::get_render_pass() +{ + if (!m_cached_renderpass) + { + m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key); + } + + return m_cached_renderpass; +} + +void VKGSRender::update_draw_state() +{ + m_profiler.start(); + + float actual_line_width = rsx::method_registers.line_width(); + vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width); + + if (rsx::method_registers.poly_offset_fill_enabled()) + { + //offset_bias is the constant factor, multiplied by the implementation factor R + //offst_scale is the slope factor, multiplied by the triangle slope factor M + vkCmdSetDepthBias(*m_current_command_buffer, rsx::method_registers.poly_offset_bias(), 0.f, rsx::method_registers.poly_offset_scale()); + } + else + { + //Zero bias value - disables depth bias + vkCmdSetDepthBias(*m_current_command_buffer, 0.f, 0.f, 0.f); + } + + //Update dynamic state + if (rsx::method_registers.blend_enabled()) + { + //Update blend constants + auto blend_colors = rsx::get_constant_blend_colors(); + vkCmdSetBlendConstants(*m_current_command_buffer, blend_colors.data()); + } + + if (rsx::method_registers.stencil_test_enabled()) + { + const bool two_sided_stencil = rsx::method_registers.two_sided_stencil_test_enabled(); + VkStencilFaceFlags face_flag = (two_sided_stencil) ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FRONT_AND_BACK; + + vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask()); + vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask()); + vkCmdSetStencilReference(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_ref()); + + if (two_sided_stencil) + { + vkCmdSetStencilWriteMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_mask()); + vkCmdSetStencilCompareMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_mask()); + vkCmdSetStencilReference(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_ref()); + } + } + + if (m_device->get_depth_bounds_support()) + { + if (rsx::method_registers.depth_bounds_test_enabled()) + { + //Update depth bounds min/max + vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); + } + else + { + vkCmdSetDepthBounds(*m_current_command_buffer, 0.f, 1.f); + } + } + + bind_viewport(); + + //TODO: Set up other render-state parameters into the program pipeline + + m_frame_stats.setup_time += m_profiler.duration(); +} + +void VKGSRender::load_texture_env() +{ + //Load textures + bool update_framebuffer_sourced = false; + bool check_for_cyclic_refs = false; + + std::lock_guard lock(m_sampler_mutex); + + if (surface_store_tag != m_rtts.cache_tag) [[unlikely]] + { + update_framebuffer_sourced = true; + surface_store_tag = m_rtts.cache_tag; + } + + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + if (!fs_sampler_state[i]) + fs_sampler_state[i] = std::make_unique(); + + if (m_samplers_dirty || m_textures_dirty[i] || + (update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) + { + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled()) + { + check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); + *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); + + if (sampler_state->is_cyclic_reference) + { + check_for_cyclic_refs |= true; + } + + bool replace = !fs_sampler_handles[i]; + VkFilter mag_filter; + vk::minification_filter min_filter; + f32 min_lod = 0.f, max_lod = 0.f; + f32 lod_bias = 0.f; + + const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); + VkBool32 compare_enabled = VK_FALSE; + VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER; + + if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT) + { + if (m_device->get_formats_support().d24_unorm_s8) + { + // NOTE: + // The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3 + // In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison + // Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage + // Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results + + // NOTE2: + // To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available + + compare_enabled = VK_TRUE; + depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true); + } + } + + const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0; + const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso()); + const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()); + const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()); + const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()); + const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()); + + // Check if non-point filtering can even be used on this format + bool can_sample_linear; + if (sampler_state->format_class == rsx::format_type::color) [[likely]] + { + // Most PS3-like formats can be linearly filtered without problem + can_sample_linear = true; + } + else + { + // Not all GPUs support linear filtering of depth formats + const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() : + vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format); + + can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + const auto mipmap_count = rsx::method_registers.fragment_textures[i].get_exact_mipmap_count(); + min_filter = vk::get_min_filter(rsx::method_registers.fragment_textures[i].min_filter()); + + if (can_sample_linear) + { + mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()); + } + else + { + mag_filter = VK_FILTER_NEAREST; + min_filter.filter = VK_FILTER_NEAREST; + } + + if (min_filter.sample_mipmaps && mipmap_count > 1) + { + f32 actual_mipmaps; + if (sampler_state->upload_context == rsx::texture_upload_context::shader_read) + { + actual_mipmaps = static_cast(mipmap_count); + } + else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather) + { + // Clamp min and max lod + actual_mipmaps = static_cast(sampler_state->external_subresource_desc.sections_to_copy.size()); + } + else + { + actual_mipmaps = 1.f; + } + + if (actual_mipmaps > 1.f) + { + min_lod = rsx::method_registers.fragment_textures[i].min_lod(); + max_lod = rsx::method_registers.fragment_textures[i].max_lod(); + lod_bias = rsx::method_registers.fragment_textures[i].bias(); + + min_lod = std::min(min_lod, actual_mipmaps - 1.f); + max_lod = std::min(max_lod, actual_mipmaps - 1.f); + + if (min_filter.mipmap_mode == VK_SAMPLER_MIPMAP_MODE_NEAREST) + { + // Round to nearest 0.5 to work around some broken games + // Unlike openGL, sampler parameters cannot be dynamically changed on vulkan, leading to many permutations + lod_bias = std::floor(lod_bias * 2.f + 0.5f) * 0.5f; + } + } + else + { + min_lod = max_lod = lod_bias = 0.f; + min_filter.mipmap_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + } + } + + if (fs_sampler_handles[i] && m_textures_dirty[i]) + { + if (!fs_sampler_handles[i]->matches(wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, + min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode)) + { + replace = true; + } + } + + if (replace) + { + fs_sampler_handles[i] = vk::get_resource_manager()->find_sampler(*m_device, wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, + min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode); + } + } + else + { + *sampler_state = {}; + } + + m_textures_dirty[i] = false; + } + } + + for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + { + if (!vs_sampler_state[i]) + vs_sampler_state[i] = std::make_unique(); + + if (m_samplers_dirty || m_vertex_textures_dirty[i] || + (update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) + { + auto sampler_state = static_cast(vs_sampler_state[i].get()); + + if (rsx::method_registers.vertex_textures[i].enabled()) + { + check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); + *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); + + if (sampler_state->is_cyclic_reference || sampler_state->external_subresource_desc.do_not_cache) + { + check_for_cyclic_refs |= true; + } + + bool replace = !vs_sampler_handles[i]; + const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN); + const auto min_lod = rsx::method_registers.vertex_textures[i].min_lod(); + const auto max_lod = rsx::method_registers.vertex_textures[i].max_lod(); + const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color()); + + if (vs_sampler_handles[i]) + { + if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, + unnormalized_coords, 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color)) + { + replace = true; + } + } + + if (replace) + { + vs_sampler_handles[i] = vk::get_resource_manager()->find_sampler( + *m_device, + VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, + unnormalized_coords, + 0.f, 1.f, min_lod, max_lod, + VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color); + } + } + else + *sampler_state = {}; + + m_vertex_textures_dirty[i] = false; + } + } + + m_samplers_dirty.store(false); + + if (check_for_cyclic_refs) + { + // Regenerate renderpass key + if (const auto key = vk::get_renderpass_key(m_fbo_images, m_current_renderpass_key); + key != m_current_renderpass_key) + { + m_current_renderpass_key = key; + m_cached_renderpass = VK_NULL_HANDLE; + } + } +} + +void VKGSRender::bind_texture_env() +{ + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + if (current_fp_metadata.referenced_textures_mask & (1 << i)) + { + vk::image_view* view = nullptr; + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) + { + if (view = sampler_state->image_handle; !view) + { + //Requires update, copy subresource + view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); + } + else + { + switch (auto raw = view->image(); raw->current_layout) + { + default: + //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_GENERAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + if (!sampler_state->is_cyclic_reference) + { + // This was used in a cyclic ref before, but is missing a barrier + // No need for a full stall, use a custom barrier instead + VkPipelineStageFlags src_stage; + VkAccessFlags src_access; + if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) + { + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + else + { + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + vk::insert_image_memory_barrier( + *m_current_command_buffer, + raw->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + src_access, VK_ACCESS_SHADER_READ_BIT, + { raw->aspect(), 0, 1, 0, 1 }); + + raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + } + } + } + + if (view) [[likely]] + { + m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set); + + if (current_fragment_program.redirected_textures & (1 << i)) + { + // Stencil mirror required + auto root_image = static_cast(view->image()); + auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT); + + if (!m_stencil_mirror_sampler) + { + m_stencil_mirror_sampler = std::make_unique(*m_device, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_FALSE, 0.f, 1.f, 0.f, 0.f, + VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, + VK_BORDER_COLOR_INT_OPAQUE_BLACK); + } + + m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set, + true); + } + } + else + { + const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set); + + if (current_fragment_program.redirected_textures & (1 << i)) + { + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set, + true); + } + } + } + } + + for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + { + if (current_vp_metadata.referenced_textures_mask & (1 << i)) + { + if (!rsx::method_registers.vertex_textures[i].enabled()) + { + const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); + + continue; + } + + auto sampler_state = static_cast(vs_sampler_state[i].get()); + auto image_ptr = sampler_state->image_handle; + + if (!image_ptr && sampler_state->validate()) + { + image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); + m_vertex_textures_dirty[i] = true; + } + + if (!image_ptr) + { + rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i); + const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); + + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); + + continue; + } + + switch (auto raw = image_ptr->image(); raw->current_layout) + { + default: + //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_GENERAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + if (!sampler_state->is_cyclic_reference) + { + // Custom barrier, see similar block in FS stage + VkPipelineStageFlags src_stage; + VkAccessFlags src_access; + if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) + { + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + else + { + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + vk::insert_image_memory_barrier( + *m_current_command_buffer, + raw->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + src_access, VK_ACCESS_SHADER_READ_BIT, + { raw->aspect(), 0, 1, 0, 1 }); + + raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + } + + m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); + } + } +} + +void VKGSRender::emit_geometry(u32 sub_index) +{ + auto &draw_call = rsx::method_registers.current_draw_clause; + m_profiler.start(); + + if (sub_index == 0) + { + analyse_inputs_interleaved(m_vertex_layout); + + if (!m_vertex_layout.validate()) + { + // No vertex inputs enabled + // Execute remainining pipeline barriers with NOP draw + do + { + draw_call.execute_pipeline_dependencies(); + } + while (draw_call.next()); + + draw_call.end(); + return; + } + } + else if (draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed) + { + // Rebase vertex bases instead of + for (auto &info : m_vertex_layout.interleaved_blocks) + { + const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); + info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location, HERE); + } + } + + const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; + const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; + + // Programs data is dependent on vertex state + auto upload_info = upload_vertex_data(); + if (!upload_info.vertex_draw_count) + { + // Malformed vertex setup; abort + return; + } + + m_frame_stats.vertex_upload_time += m_profiler.duration(); + + auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; + auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; + bool update_descriptors = false; + + const auto& binding_table = m_device->get_pipeline_binding_table(); + + if (sub_index == 0) + { + update_descriptors = true; + + // Allocate stream layout memory for this batch + m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128; + m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range); + + if (vk::test_status_interrupt(vk::heap_changed)) + { + if (m_vertex_layout_storage && + m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value) + { + m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage)); + } + + vk::clear_status_interrupt(vk::heap_changed); + } + } + else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) + { + // Need to update descriptors; make a copy for the next draw + VkDescriptorSet new_descriptor_set = allocate_descriptor_set(); + std::vector copy_set(binding_table.total_descriptor_bindings); + + for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n) + { + copy_set[n] = + { + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType + nullptr, // pNext + m_current_frame->descriptor_set, // srcSet + n, // srcBinding + 0u, // srcArrayElement + new_descriptor_set, // dstSet + n, // dstBinding + 0u, // dstArrayElement + 1u // descriptorCount + }; + } + + vkUpdateDescriptorSets(*m_device, 0, 0, binding_table.total_descriptor_bindings, copy_set.data()); + m_current_frame->descriptor_set = new_descriptor_set; + + update_descriptors = true; + } + + // Update vertex fetch parameters + update_vertex_env(sub_index, upload_info); + + verify(HERE), m_vertex_layout_storage; + if (update_descriptors) + { + m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); + } + + if (!m_current_subdraw_id++) + { + vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); + update_draw_state(); + begin_render_pass(); + + if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support()) + { + // It is inconvenient that conditional rendering breaks other things like compute dispatch + // TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch + VkConditionalRenderingBeginInfoEXT info{}; + info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; + info.buffer = m_cond_render_buffer->value; + + m_device->cmdBeginConditionalRenderingEXT(*m_current_command_buffer, &info); + m_current_command_buffer->flags |= vk::command_buffer::cb_has_conditional_render; + } + } + + // Bind the new set of descriptors for use with this draw call + vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); + + m_frame_stats.setup_time += m_profiler.duration(); + + if (!upload_info.index_info) + { + if (draw_call.is_single_draw()) + { + vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0); + } + else + { + u32 vertex_offset = 0; + const auto subranges = draw_call.get_subranges(); + for (const auto &range : subranges) + { + vkCmdDraw(*m_current_command_buffer, range.count, 1, vertex_offset, 0); + vertex_offset += range.count; + } + } + } + else + { + const VkIndexType index_type = std::get<1>(*upload_info.index_info); + const VkDeviceSize offset = std::get<0>(*upload_info.index_info); + + vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); + + if (rsx::method_registers.current_draw_clause.is_single_draw()) + { + const u32 index_count = upload_info.vertex_draw_count; + vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); + } + else + { + u32 vertex_offset = 0; + const auto subranges = draw_call.get_subranges(); + for (const auto &range : subranges) + { + const auto count = get_index_count(draw_call.primitive, range.count); + vkCmdDrawIndexed(*m_current_command_buffer, count, 1, vertex_offset, 0, 0); + vertex_offset += count; + } + } + } + + m_frame_stats.draw_exec_time += m_profiler.duration(); +} + +void VKGSRender::begin() +{ + rsx::thread::begin(); + + if (skip_current_frame || swapchain_unavailable || cond_render_ctrl.disable_rendering()) + return; + + init_buffers(rsx::framebuffer_creation_context::context_draw); +} + +void VKGSRender::end() +{ + if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || cond_render_ctrl.disable_rendering()) + { + execute_nop_draw(); + rsx::thread::end(); + return; + } + + // Check for frame resource status here because it is possible for an async flip to happen between begin/end + if (m_current_frame->flags & frame_context_state::dirty) [[unlikely]] + { + check_present_status(); + + if (m_current_frame->swap_command_buffer) [[unlikely]] + { + // Borrow time by using the auxilliary context + m_aux_frame_context.grab_resources(*m_current_frame); + m_current_frame = &m_aux_frame_context; + } + else if (m_current_frame->used_descriptors) + { + m_current_frame->descriptor_pool.reset(0); + m_current_frame->used_descriptors = 0; + } + + verify(HERE), !m_current_frame->swap_command_buffer; + + m_current_frame->flags &= ~frame_context_state::dirty; + } + + m_profiler.start(); + + // Check for data casts + // NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better + auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); + if (ds && ds->old_contents.size() == 1 && + ds->old_contents[0].source->info.format == VK_FORMAT_B8G8R8A8_UNORM) + { + auto key = vk::get_renderpass_key(ds->info.format); + auto render_pass = vk::get_renderpass(*m_device, key); + verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE; + + VkClearDepthStencilValue clear = { 1.f, 0xFF }; + VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 }; + + // Initialize source + auto src = vk::as_rtt(ds->old_contents[0].source); + src->read_barrier(*m_current_command_buffer); + + switch (src->current_layout) + { + case VK_IMAGE_LAYOUT_GENERAL: + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + break; + //case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + default: + src->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + } + + // Clear explicitly before starting the inheritance transfer + const bool preinitialized = (ds->current_layout == VK_IMAGE_LAYOUT_GENERAL); + if (!preinitialized) ds->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range); + if (!preinitialized) ds->pop_layout(*m_current_command_buffer); + + // TODO: Stencil transfer + ds->old_contents[0].init_transfer(ds); + m_depth_converter->run(*m_current_command_buffer, + ds->old_contents[0].src_rect(), + ds->old_contents[0].dst_rect(), + src->get_view(0xAAE4, rsx::default_remap_vector), + ds, render_pass); + + // TODO: Flush management to avoid pass running out of ubo space (very unlikely) + ds->on_write(); + } + + load_texture_env(); + m_frame_stats.textures_upload_time += m_profiler.duration(); + + if (!load_program()) + { + // Program is not ready, skip drawing this + std::this_thread::yield(); + execute_nop_draw(); + // m_rtts.on_write(); - breaks games for obvious reasons + rsx::thread::end(); + return; + } + + // Allocate descriptor set + check_descriptors(); + m_current_frame->descriptor_set = allocate_descriptor_set(); + + // Load program execution environment + load_program_env(); + m_frame_stats.setup_time += m_profiler.duration(); + + bind_texture_env(); + m_texture_cache.release_uncached_temporary_subresources(); + m_frame_stats.textures_upload_time += m_profiler.duration(); + + if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task) + { + u32 occlusion_id = m_occlusion_query_pool.find_free_slot(); + if (occlusion_id == UINT32_MAX) + { + // Force flush + rsx_log.error("[Performance Warning] Out of free occlusion slots. Forcing hard sync."); + ZCULL_control::sync(this); + + occlusion_id = m_occlusion_query_pool.find_free_slot(); + if (occlusion_id == UINT32_MAX) + { + //rsx_log.error("Occlusion pool overflow"); + if (m_current_task) m_current_task->result = 1; + } + } + + // Begin query + m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); + + auto &data = m_occlusion_map[m_active_query_info->driver_handle]; + data.indices.push_back(occlusion_id); + data.set_sync_command_buffer(m_current_command_buffer); + + m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task; + m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query); + } + + bool primitive_emulated = false; + vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); + + // Apply write memory barriers + if (ds) ds->write_barrier(*m_current_command_buffer); + + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (auto surface = std::get<1>(rtt)) + { + surface->write_barrier(*m_current_command_buffer); + } + } + + // Final heap check... + check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE); + + u32 sub_index = 0; + m_current_subdraw_id = 0; + + rsx::method_registers.current_draw_clause.begin(); + do + { + emit_geometry(sub_index++); + } + while (rsx::method_registers.current_draw_clause.next()); + + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render) + { + m_device->cmdEndConditionalRenderingEXT(*m_current_command_buffer); + m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render); + } + + m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled); + + rsx::thread::end(); +} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index d277b61bba..0d178db6e1 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1,11 +1,7 @@ #include "stdafx.h" -#include "VKGSRender.h" #include "../Overlays/overlay_shader_compile_notification.h" #include "../Overlays/Shaders/shader_loading_dialog_native.h" -#include "../rsx_methods.h" -#include "../rsx_utils.h" -#include "../Common/BufferUtils.h" -#include "VKFormats.h" +#include "VKGSRender.h" #include "VKCommonDecompiler.h" #include "VKRenderPass.h" #include "VKResourceManager.h" @@ -42,22 +38,7 @@ namespace namespace vk { - VkCompareOp get_compare_func(rsx::comparison_function op, bool reverse_direction = false) - { - switch (op) - { - case rsx::comparison_function::never: return VK_COMPARE_OP_NEVER; - case rsx::comparison_function::greater: return reverse_direction ? VK_COMPARE_OP_LESS: VK_COMPARE_OP_GREATER; - case rsx::comparison_function::less: return reverse_direction ? VK_COMPARE_OP_GREATER: VK_COMPARE_OP_LESS; - case rsx::comparison_function::less_or_equal: return reverse_direction ? VK_COMPARE_OP_GREATER_OR_EQUAL: VK_COMPARE_OP_LESS_OR_EQUAL; - case rsx::comparison_function::greater_or_equal: return reverse_direction ? VK_COMPARE_OP_LESS_OR_EQUAL: VK_COMPARE_OP_GREATER_OR_EQUAL; - case rsx::comparison_function::equal: return VK_COMPARE_OP_EQUAL; - case rsx::comparison_function::not_equal: return VK_COMPARE_OP_NOT_EQUAL; - case rsx::comparison_function::always: return VK_COMPARE_OP_ALWAYS; - default: - fmt::throw_exception("Unknown compare op: 0x%x" HERE, static_cast(op)); - } - } + VkCompareOp get_compare_func(rsx::comparison_function op, bool reverse_direction = false); std::pair get_compatible_surface_format(rsx::surface_color_format color_format) { @@ -194,7 +175,6 @@ namespace vk } } - VkStencilOp get_stencil_op(rsx::stencil_op op) { switch (op) @@ -234,22 +214,6 @@ namespace vk fmt::throw_exception("Unknown cull face value: 0x%x" HERE, static_cast(cfv)); } } - - VkImageViewType get_view_type(rsx::texture_dimension_extended type) - { - switch (type) - { - case rsx::texture_dimension_extended::texture_dimension_1d: - return VK_IMAGE_VIEW_TYPE_1D; - case rsx::texture_dimension_extended::texture_dimension_2d: - return VK_IMAGE_VIEW_TYPE_2D; - case rsx::texture_dimension_extended::texture_dimension_cubemap: - return VK_IMAGE_VIEW_TYPE_CUBE; - case rsx::texture_dimension_extended::texture_dimension_3d: - return VK_IMAGE_VIEW_TYPE_3D; - default: ASSUME(0); - }; - } } namespace @@ -986,892 +950,6 @@ VkDescriptorSet VKGSRender::allocate_descriptor_set() return new_descriptor_set; } -void VKGSRender::begin() -{ - rsx::thread::begin(); - - if (skip_current_frame || swapchain_unavailable || cond_render_ctrl.disable_rendering()) - return; - - init_buffers(rsx::framebuffer_creation_context::context_draw); -} - -void VKGSRender::update_draw_state() -{ - m_profiler.start(); - - float actual_line_width = rsx::method_registers.line_width(); - vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width); - - if (rsx::method_registers.poly_offset_fill_enabled()) - { - //offset_bias is the constant factor, multiplied by the implementation factor R - //offst_scale is the slope factor, multiplied by the triangle slope factor M - vkCmdSetDepthBias(*m_current_command_buffer, rsx::method_registers.poly_offset_bias(), 0.f, rsx::method_registers.poly_offset_scale()); - } - else - { - //Zero bias value - disables depth bias - vkCmdSetDepthBias(*m_current_command_buffer, 0.f, 0.f, 0.f); - } - - //Update dynamic state - if (rsx::method_registers.blend_enabled()) - { - //Update blend constants - auto blend_colors = rsx::get_constant_blend_colors(); - vkCmdSetBlendConstants(*m_current_command_buffer, blend_colors.data()); - } - - if (rsx::method_registers.stencil_test_enabled()) - { - const bool two_sided_stencil = rsx::method_registers.two_sided_stencil_test_enabled(); - VkStencilFaceFlags face_flag = (two_sided_stencil) ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FRONT_AND_BACK; - - vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask()); - vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask()); - vkCmdSetStencilReference(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_ref()); - - if (two_sided_stencil) - { - vkCmdSetStencilWriteMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_mask()); - vkCmdSetStencilCompareMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_mask()); - vkCmdSetStencilReference(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_ref()); - } - } - - if (m_device->get_depth_bounds_support()) - { - if (rsx::method_registers.depth_bounds_test_enabled()) - { - //Update depth bounds min/max - vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); - } - else - { - vkCmdSetDepthBounds(*m_current_command_buffer, 0.f, 1.f); - } - } - - bind_viewport(); - - //TODO: Set up other render-state parameters into the program pipeline - - m_frame_stats.setup_time += m_profiler.duration(); -} - -void VKGSRender::begin_render_pass() -{ - vk::begin_renderpass( - *m_current_command_buffer, - get_render_pass(), - m_draw_fbo->value, - { positionu{0u, 0u}, sizeu{m_draw_fbo->width(), m_draw_fbo->height()} }); -} - -void VKGSRender::close_render_pass() -{ - vk::end_renderpass(*m_current_command_buffer); -} - -VkRenderPass VKGSRender::get_render_pass() -{ - if (!m_cached_renderpass) - { - m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key); - } - - return m_cached_renderpass; -} - -void VKGSRender::emit_geometry(u32 sub_index) -{ - auto &draw_call = rsx::method_registers.current_draw_clause; - m_profiler.start(); - - if (sub_index == 0) - { - analyse_inputs_interleaved(m_vertex_layout); - - if (!m_vertex_layout.validate()) - { - // No vertex inputs enabled - // Execute remainining pipeline barriers with NOP draw - do - { - draw_call.execute_pipeline_dependencies(); - } - while (draw_call.next()); - - draw_call.end(); - return; - } - } - else if (draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed) - { - // Rebase vertex bases instead of - for (auto &info : m_vertex_layout.interleaved_blocks) - { - const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); - info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location, HERE); - } - } - - const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; - const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; - - // Programs data is dependent on vertex state - auto upload_info = upload_vertex_data(); - if (!upload_info.vertex_draw_count) - { - // Malformed vertex setup; abort - return; - } - - m_frame_stats.vertex_upload_time += m_profiler.duration(); - - auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; - auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; - bool update_descriptors = false; - - const auto& binding_table = m_device->get_pipeline_binding_table(); - - if (sub_index == 0) - { - update_descriptors = true; - - // Allocate stream layout memory for this batch - m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128; - m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range); - - if (vk::test_status_interrupt(vk::heap_changed)) - { - if (m_vertex_layout_storage && - m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value) - { - m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage)); - } - - vk::clear_status_interrupt(vk::heap_changed); - } - } - else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) - { - // Need to update descriptors; make a copy for the next draw - VkDescriptorSet new_descriptor_set = allocate_descriptor_set(); - std::vector copy_set(binding_table.total_descriptor_bindings); - - for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n) - { - copy_set[n] = - { - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType - nullptr, // pNext - m_current_frame->descriptor_set, // srcSet - n, // srcBinding - 0u, // srcArrayElement - new_descriptor_set, // dstSet - n, // dstBinding - 0u, // dstArrayElement - 1u // descriptorCount - }; - } - - vkUpdateDescriptorSets(*m_device, 0, 0, binding_table.total_descriptor_bindings, copy_set.data()); - m_current_frame->descriptor_set = new_descriptor_set; - - update_descriptors = true; - } - - // Update vertex fetch parameters - update_vertex_env(sub_index, upload_info); - - verify(HERE), m_vertex_layout_storage; - if (update_descriptors) - { - m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set); - m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); - } - - if (!m_current_subdraw_id++) - { - vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); - update_draw_state(); - begin_render_pass(); - - if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support()) - { - // It is inconvenient that conditional rendering breaks other things like compute dispatch - // TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch - VkConditionalRenderingBeginInfoEXT info{}; - info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; - info.buffer = m_cond_render_buffer->value; - - m_device->cmdBeginConditionalRenderingEXT(*m_current_command_buffer, &info); - m_current_command_buffer->flags |= vk::command_buffer::cb_has_conditional_render; - } - } - - // Bind the new set of descriptors for use with this draw call - vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); - - m_frame_stats.setup_time += m_profiler.duration(); - - if (!upload_info.index_info) - { - if (draw_call.is_single_draw()) - { - vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0); - } - else - { - u32 vertex_offset = 0; - const auto subranges = draw_call.get_subranges(); - for (const auto &range : subranges) - { - vkCmdDraw(*m_current_command_buffer, range.count, 1, vertex_offset, 0); - vertex_offset += range.count; - } - } - } - else - { - const VkIndexType index_type = std::get<1>(*upload_info.index_info); - const VkDeviceSize offset = std::get<0>(*upload_info.index_info); - - vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); - - if (rsx::method_registers.current_draw_clause.is_single_draw()) - { - const u32 index_count = upload_info.vertex_draw_count; - vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); - } - else - { - u32 vertex_offset = 0; - const auto subranges = draw_call.get_subranges(); - for (const auto &range : subranges) - { - const auto count = get_index_count(draw_call.primitive, range.count); - vkCmdDrawIndexed(*m_current_command_buffer, count, 1, vertex_offset, 0, 0); - vertex_offset += count; - } - } - } - - m_frame_stats.draw_exec_time += m_profiler.duration(); -} - -void VKGSRender::end() -{ - if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || cond_render_ctrl.disable_rendering()) - { - execute_nop_draw(); - rsx::thread::end(); - return; - } - - // Check for frame resource status here because it is possible for an async flip to happen between begin/end - if (m_current_frame->flags & frame_context_state::dirty) [[unlikely]] - { - check_present_status(); - - if (m_current_frame->swap_command_buffer) [[unlikely]] - { - // Borrow time by using the auxilliary context - m_aux_frame_context.grab_resources(*m_current_frame); - m_current_frame = &m_aux_frame_context; - } - else if (m_current_frame->used_descriptors) - { - m_current_frame->descriptor_pool.reset(0); - m_current_frame->used_descriptors = 0; - } - - verify(HERE), !m_current_frame->swap_command_buffer; - - m_current_frame->flags &= ~frame_context_state::dirty; - } - - m_profiler.start(); - - // Check for data casts - // NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better - auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); - if (ds && ds->old_contents.size() == 1 && - ds->old_contents[0].source->info.format == VK_FORMAT_B8G8R8A8_UNORM) - { - auto key = vk::get_renderpass_key(ds->info.format); - auto render_pass = vk::get_renderpass(*m_device, key); - verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE; - - VkClearDepthStencilValue clear = { 1.f, 0xFF }; - VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 }; - - // Initialize source - auto src = vk::as_rtt(ds->old_contents[0].source); - src->read_barrier(*m_current_command_buffer); - - switch (src->current_layout) - { - case VK_IMAGE_LAYOUT_GENERAL: - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - break; - //case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - default: - src->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - } - - // Clear explicitly before starting the inheritance transfer - const bool preinitialized = (ds->current_layout == VK_IMAGE_LAYOUT_GENERAL); - if (!preinitialized) ds->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range); - if (!preinitialized) ds->pop_layout(*m_current_command_buffer); - - // TODO: Stencil transfer - ds->old_contents[0].init_transfer(ds); - m_depth_converter->run(*m_current_command_buffer, - ds->old_contents[0].src_rect(), - ds->old_contents[0].dst_rect(), - src->get_view(0xAAE4, rsx::default_remap_vector), - ds, render_pass); - - // TODO: Flush management to avoid pass running out of ubo space (very unlikely) - ds->on_write(); - } - - //Load textures - { - std::lock_guard lock(m_sampler_mutex); - bool update_framebuffer_sourced = false; - bool check_for_cyclic_refs = false; - - if (surface_store_tag != m_rtts.cache_tag) [[unlikely]] - { - update_framebuffer_sourced = true; - surface_store_tag = m_rtts.cache_tag; - } - - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - if (!fs_sampler_state[i]) - fs_sampler_state[i] = std::make_unique(); - - if (m_samplers_dirty || m_textures_dirty[i] || - (update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) - { - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled()) - { - check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); - *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); - - if (sampler_state->is_cyclic_reference) - { - check_for_cyclic_refs |= true; - } - - bool replace = !fs_sampler_handles[i]; - VkFilter mag_filter; - vk::minification_filter min_filter; - f32 min_lod = 0.f, max_lod = 0.f; - f32 lod_bias = 0.f; - - const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - VkBool32 compare_enabled = VK_FALSE; - VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER; - - if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT) - { - if (m_device->get_formats_support().d24_unorm_s8) - { - // NOTE: - // The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3 - // In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison - // Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage - // Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results - - // NOTE2: - // To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available - - compare_enabled = VK_TRUE; - depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true); - } - } - - const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0; - const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso()); - const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()); - const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()); - const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()); - const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()); - - // Check if non-point filtering can even be used on this format - bool can_sample_linear; - if (sampler_state->format_class == rsx::format_type::color) [[likely]] - { - // Most PS3-like formats can be linearly filtered without problem - can_sample_linear = true; - } - else - { - // Not all GPUs support linear filtering of depth formats - const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() : - vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format); - - can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - } - - const auto mipmap_count = rsx::method_registers.fragment_textures[i].get_exact_mipmap_count(); - min_filter = vk::get_min_filter(rsx::method_registers.fragment_textures[i].min_filter()); - - if (can_sample_linear) - { - mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()); - } - else - { - mag_filter = VK_FILTER_NEAREST; - min_filter.filter = VK_FILTER_NEAREST; - } - - if (min_filter.sample_mipmaps && mipmap_count > 1) - { - f32 actual_mipmaps; - if (sampler_state->upload_context == rsx::texture_upload_context::shader_read) - { - actual_mipmaps = static_cast(mipmap_count); - } - else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather) - { - // Clamp min and max lod - actual_mipmaps = static_cast(sampler_state->external_subresource_desc.sections_to_copy.size()); - } - else - { - actual_mipmaps = 1.f; - } - - if (actual_mipmaps > 1.f) - { - min_lod = rsx::method_registers.fragment_textures[i].min_lod(); - max_lod = rsx::method_registers.fragment_textures[i].max_lod(); - lod_bias = rsx::method_registers.fragment_textures[i].bias(); - - min_lod = std::min(min_lod, actual_mipmaps - 1.f); - max_lod = std::min(max_lod, actual_mipmaps - 1.f); - - if (min_filter.mipmap_mode == VK_SAMPLER_MIPMAP_MODE_NEAREST) - { - // Round to nearest 0.5 to work around some broken games - // Unlike openGL, sampler parameters cannot be dynamically changed on vulkan, leading to many permutations - lod_bias = std::floor(lod_bias * 2.f + 0.5f) * 0.5f; - } - } - else - { - min_lod = max_lod = lod_bias = 0.f; - min_filter.mipmap_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - } - } - - if (fs_sampler_handles[i] && m_textures_dirty[i]) - { - if (!fs_sampler_handles[i]->matches(wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, - min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode)) - { - replace = true; - } - } - - if (replace) - { - fs_sampler_handles[i] = vk::get_resource_manager()->find_sampler(*m_device, wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, - min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode); - } - } - else - { - *sampler_state = {}; - } - - m_textures_dirty[i] = false; - } - } - - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) - { - if (!vs_sampler_state[i]) - vs_sampler_state[i] = std::make_unique(); - - if (m_samplers_dirty || m_vertex_textures_dirty[i] || - (update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) - { - auto sampler_state = static_cast(vs_sampler_state[i].get()); - - if (rsx::method_registers.vertex_textures[i].enabled()) - { - check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); - *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); - - if (sampler_state->is_cyclic_reference || sampler_state->external_subresource_desc.do_not_cache) - { - check_for_cyclic_refs |= true; - } - - bool replace = !vs_sampler_handles[i]; - const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN); - const auto min_lod = rsx::method_registers.vertex_textures[i].min_lod(); - const auto max_lod = rsx::method_registers.vertex_textures[i].max_lod(); - const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color()); - - if (vs_sampler_handles[i]) - { - if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, - unnormalized_coords, 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color)) - { - replace = true; - } - } - - if (replace) - { - vs_sampler_handles[i] = vk::get_resource_manager()->find_sampler( - *m_device, - VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, - unnormalized_coords, - 0.f, 1.f, min_lod, max_lod, - VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color); - } - } - else - *sampler_state = {}; - - m_vertex_textures_dirty[i] = false; - } - } - - m_samplers_dirty.store(false); - - if (check_for_cyclic_refs) - { - // Regenerate renderpass key - if (const auto key = vk::get_renderpass_key(m_fbo_images, m_current_renderpass_key); - key != m_current_renderpass_key) - { - m_current_renderpass_key = key; - m_cached_renderpass = VK_NULL_HANDLE; - } - } - } - - m_frame_stats.textures_upload_time += m_profiler.duration(); - - if (!load_program()) - { - // Program is not ready, skip drawing this - std::this_thread::yield(); - execute_nop_draw(); - // m_rtts.on_write(); - breaks games for obvious reasons - rsx::thread::end(); - return; - } - - // Allocate descriptor set - check_descriptors(); - m_current_frame->descriptor_set = allocate_descriptor_set(); - - // Load program execution environment - load_program_env(); - - m_frame_stats.setup_time += m_profiler.duration(); - - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - if (current_fp_metadata.referenced_textures_mask & (1 << i)) - { - vk::image_view* view = nullptr; - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled() && - sampler_state->validate()) - { - if (view = sampler_state->image_handle; !view) - { - //Requires update, copy subresource - view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); - } - else - { - switch (auto raw = view->image(); raw->current_layout) - { - default: - //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - break; - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_GENERAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; - if (!sampler_state->is_cyclic_reference) - { - // This was used in a cyclic ref before, but is missing a barrier - // No need for a full stall, use a custom barrier instead - VkPipelineStageFlags src_stage; - VkAccessFlags src_access; - if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) - { - src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - } - else - { - src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - } - - vk::insert_image_memory_barrier( - *m_current_command_buffer, - raw->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - src_access, VK_ACCESS_SHADER_READ_BIT, - { raw->aspect(), 0, 1, 0, 1 }); - - raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - break; - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - } - } - } - - if (view) [[likely]] - { - m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); - - if (current_fragment_program.redirected_textures & (1 << i)) - { - // Stencil mirror required - auto root_image = static_cast(view->image()); - auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT); - - if (!m_stencil_mirror_sampler) - { - m_stencil_mirror_sampler = std::make_unique(*m_device, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_FALSE, 0.f, 1.f, 0.f, 0.f, - VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, - VK_BORDER_COLOR_INT_OPAQUE_BLACK); - } - - m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, - true); - } - } - else - { - const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); - - if (current_fragment_program.redirected_textures & (1 << i)) - { - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, - true); - } - } - } - } - - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) - { - if (current_vp_metadata.referenced_textures_mask & (1 << i)) - { - if (!rsx::method_registers.vertex_textures[i].enabled()) - { - const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); - - continue; - } - - auto sampler_state = static_cast(vs_sampler_state[i].get()); - auto image_ptr = sampler_state->image_handle; - - if (!image_ptr && sampler_state->validate()) - { - image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); - m_vertex_textures_dirty[i] = true; - } - - if (!image_ptr) - { - rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i); - const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); - - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); - - continue; - } - - switch (auto raw = image_ptr->image(); raw->current_layout) - { - default: - //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - break; - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - case VK_IMAGE_LAYOUT_GENERAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; - if (!sampler_state->is_cyclic_reference) - { - // Custom barrier, see similar block in FS stage - VkPipelineStageFlags src_stage; - VkAccessFlags src_access; - if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) - { - src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - } - else - { - src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - } - - vk::insert_image_memory_barrier( - *m_current_command_buffer, - raw->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - src_access, VK_ACCESS_SHADER_READ_BIT, - { raw->aspect(), 0, 1, 0, 1 }); - - raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - break; - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; - raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - } - - m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); - } - } - - m_texture_cache.release_uncached_temporary_subresources(); - - m_frame_stats.textures_upload_time += m_profiler.duration(); - - if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task) - { - u32 occlusion_id = m_occlusion_query_pool.find_free_slot(); - if (occlusion_id == UINT32_MAX) - { - // Force flush - rsx_log.error("[Performance Warning] Out of free occlusion slots. Forcing hard sync."); - ZCULL_control::sync(this); - - occlusion_id = m_occlusion_query_pool.find_free_slot(); - if (occlusion_id == UINT32_MAX) - { - //rsx_log.error("Occlusion pool overflow"); - if (m_current_task) m_current_task->result = 1; - } - } - - // Begin query - m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); - - auto &data = m_occlusion_map[m_active_query_info->driver_handle]; - data.indices.push_back(occlusion_id); - data.set_sync_command_buffer(m_current_command_buffer); - - m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task; - m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query); - } - - bool primitive_emulated = false; - vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); - - // Apply write memory barriers - if (ds) ds->write_barrier(*m_current_command_buffer); - - for (auto &rtt : m_rtts.m_bound_render_targets) - { - if (auto surface = std::get<1>(rtt)) - { - surface->write_barrier(*m_current_command_buffer); - } - } - - // Final heap check... - check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE); - - u32 sub_index = 0; - m_current_subdraw_id = 0; - - rsx::method_registers.current_draw_clause.begin(); - do - { - emit_geometry(sub_index++); - } - while (rsx::method_registers.current_draw_clause.next()); - - if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render) - { - m_device->cmdEndConditionalRenderingEXT(*m_current_command_buffer); - m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render); - } - - m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled); - - rsx::thread::end(); -} - void VKGSRender::set_viewport() { const auto clip_width = rsx::apply_resolution_scale(rsx::method_registers.surface_clip_width(), true); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 1b9a3eec4f..2db9ec9b7a 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -510,6 +510,9 @@ private: void load_program_env(); void update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info); + void load_texture_env(); + void bind_texture_env(); + public: void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); void set_viewport(); diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp new file mode 100644 index 0000000000..d2e86656e2 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -0,0 +1,7 @@ +#include "stdafx.h" +#include "VKShaderInterpreter.h" + +namespace vk +{ + +}; diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h new file mode 100644 index 0000000000..c820cddec9 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h @@ -0,0 +1,9 @@ +#pragma once +#include "VKGSRender.h" + +namespace vk +{ + class shader_interpreter : glsl::program + { + }; +} diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 3964e08af3..4b07e65067 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -83,18 +83,21 @@ + + + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index a95348e504..aacfb71395 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -1,6 +1,7 @@  + @@ -10,6 +11,7 @@ + @@ -24,6 +26,7 @@ + diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index a1e344a216..02d207ff1c 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -38,6 +38,7 @@ + @@ -47,6 +48,7 @@ + @@ -57,6 +59,7 @@ + diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index 25b20b2211..91db33afcc 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -3,6 +3,7 @@ + @@ -13,6 +14,7 @@ + @@ -34,6 +36,7 @@ +