From 3ddfa288cfd4f1e42998e81be0fbf623e861a1f7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 27 Oct 2020 23:41:20 +0300 Subject: [PATCH] rsx: Use multithreaded shader compiler backend --- rpcs3/Emu/CMakeLists.txt | 2 + rpcs3/Emu/RSX/CgBinaryProgram.h | 2 +- .../RSX/Common/FragmentProgramDecompiler.cpp | 4 +- rpcs3/Emu/RSX/Common/ProgramStateCache.cpp | 6 +- rpcs3/Emu/RSX/Common/ProgramStateCache.h | 237 +++++--------- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 55 ++-- rpcs3/Emu/RSX/GL/GLGSRender.h | 5 - rpcs3/Emu/RSX/GL/GLHelpers.cpp | 12 + rpcs3/Emu/RSX/GL/GLHelpers.h | 71 ++--- rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp | 149 +++++++++ rpcs3/Emu/RSX/GL/GLPipelineCompiler.h | 72 +++++ rpcs3/Emu/RSX/GL/GLProgramBuffer.h | 126 +++++--- rpcs3/Emu/RSX/RSXFragmentProgram.h | 84 ++++- rpcs3/Emu/RSX/RSXThread.cpp | 41 +-- rpcs3/Emu/RSX/RSXThread.h | 4 - rpcs3/Emu/RSX/VK/VKCompute.h | 7 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 20 +- rpcs3/Emu/RSX/VK/VKGSRender.h | 6 +- rpcs3/Emu/RSX/VK/VKOverlays.h | 7 +- rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp | 231 ++++++++++++++ rpcs3/Emu/RSX/VK/VKPipelineCompiler.h | 183 +++++++++++ rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 290 ++++++------------ rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 6 +- rpcs3/Emu/RSX/VK/VKTextOut.h | 8 +- rpcs3/Emu/RSX/rsx_cache.h | 4 +- rpcs3/Emu/system_config.h | 1 + rpcs3/GLGSRender.vcxproj | 4 +- rpcs3/GLGSRender.vcxproj.filters | 4 +- rpcs3/VKGSRender.vcxproj | 2 + rpcs3/VKGSRender.vcxproj.filters | 2 + 30 files changed, 1065 insertions(+), 580 deletions(-) create mode 100644 rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp create mode 100644 rpcs3/Emu/RSX/GL/GLPipelineCompiler.h create mode 100644 rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKPipelineCompiler.h diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 6926da20bb..ac68081f2a 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -422,6 +422,7 @@ target_sources(rpcs3_emu PRIVATE RSX/GL/GLFragmentProgram.cpp RSX/GL/GLGSRender.cpp RSX/GL/GLHelpers.cpp + RSX/GL/GLPipelineCompiler.cpp RSX/GL/GLPresent.cpp RSX/GL/GLRenderTargets.cpp RSX/GL/GLShaderInterpreter.cpp @@ -443,6 +444,7 @@ if(TARGET 3rdparty_vulkan) RSX/VK/VKGSRender.cpp RSX/VK/VKHelpers.cpp RSX/VK/VKMemAlloc.cpp + RSX/VK/VKPipelineCompiler.cpp RSX/VK/VKPresent.cpp RSX/VK/VKProgramPipeline.cpp RSX/VK/VKQueryPool.cpp diff --git a/rpcs3/Emu/RSX/CgBinaryProgram.h b/rpcs3/Emu/RSX/CgBinaryProgram.h index 754b06252e..2271d32df4 100644 --- a/rpcs3/Emu/RSX/CgBinaryProgram.h +++ b/rpcs3/Emu/RSX/CgBinaryProgram.h @@ -310,7 +310,7 @@ public: u32 ctrl = (vmfprog.outputFromH0 ? 0 : 0x40) | (vmfprog.depthReplace ? 0xe : 0); std::vector td; RSXFragmentProgram prog; - prog.ucode_length = 0, prog.addr = vm::base(ptr + vmprog.ucode), prog.offset = 0, prog.ctrl = ctrl; + prog.ucode_length = 0, prog.data = vm::base(ptr + vmprog.ucode), prog.offset = 0, prog.ctrl = ctrl; GLFragmentDecompilerThread(m_glsl_shader, param_array, prog, size).Task(); vm::close(); } diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index 7ec7e635d9..9350767ea7 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -217,7 +217,7 @@ std::string FragmentProgramDecompiler::AddConst() return name; } - auto data = reinterpret_cast*>(static_cast(m_prog.addr) + m_size + 4 * sizeof(u32)); + auto data = reinterpret_cast*>(static_cast(m_prog.get_data()) + m_size + 4 * sizeof(u32)); m_offset = 2 * 4 * sizeof(u32); u32 x = GetData(data[0]); u32 y = GetData(data[1]); @@ -1118,7 +1118,7 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode) std::string FragmentProgramDecompiler::Decompile() { - auto data = static_cast*>(m_prog.addr); + auto data = static_cast*>(m_prog.get_data()); m_size = 0; m_location = 0; m_loop_count = 0; diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index 6bc0a64ada..23604abbbe 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -423,7 +423,7 @@ size_t fragment_program_utils::get_fragment_program_ucode_hash(const RSXFragment { // 64-bit Fowler/Noll/Vo FNV-1a hash code size_t hash = 0xCBF29CE484222325ULL; - const void* instbuffer = program.addr; + const void* instbuffer = program.get_data(); size_t instIndex = 0; while (true) { @@ -475,8 +475,8 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con return false; } - const void* instBuffer1 = binary1.addr; - const void* instBuffer2 = binary2.addr; + const void* instBuffer1 = binary1.get_data(); + const void* instBuffer2 = binary2.get_data(); size_t instIndex = 0; while (true) { diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index 38e9f86042..5b646b1b7d 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -99,6 +99,7 @@ template class program_state_cache { using pipeline_storage_type = typename backend_traits::pipeline_storage_type; + using pipeline_type = typename backend_traits::pipeline_type; using pipeline_properties = typename backend_traits::pipeline_properties; using vertex_program_type = typename backend_traits::vertex_program_type; using fragment_program_type = typename backend_traits::fragment_program_type; @@ -133,23 +134,6 @@ class program_state_cache } }; - struct async_decompiler_job - { - RSXVertexProgram vertex_program; - RSXFragmentProgram fragment_program; - pipeline_properties properties; - - std::vector local_storage; - - async_decompiler_job(RSXVertexProgram v, const RSXFragmentProgram f, pipeline_properties p) : - vertex_program(std::move(v)), fragment_program(f), properties(std::move(p)) - { - local_storage.resize(fragment_program.ucode_length); - std::memcpy(local_storage.data(), fragment_program.addr, fragment_program.ucode_length); - fragment_program.addr = local_storage.data(); - } - }; - protected: using decompiler_callback_t = std::function; @@ -165,8 +149,6 @@ protected: binary_to_fragment_program m_fragment_shader_cache; std::unordered_map m_storage; - std::deque m_decompile_queue; - std::unordered_map m_decompiler_map; decompiler_callback_t notify_pipeline_compiled; vertex_program_type __null_vertex_program; @@ -213,7 +195,6 @@ protected: { bool recompile = false; fragment_program_type* new_shader; - void* fragment_program_ucode_copy; { reader_lock lock(m_fragment_mutex); @@ -229,27 +210,17 @@ protected: } rsx_log.notice("FP not found in buffer!"); - fragment_program_ucode_copy = malloc(rsx_fp.ucode_length); - - verify("malloc() failed!" HERE), fragment_program_ucode_copy; - std::memcpy(fragment_program_ucode_copy, rsx_fp.addr, rsx_fp.ucode_length); - - RSXFragmentProgram new_fp_key = rsx_fp; - new_fp_key.addr = fragment_program_ucode_copy; lock.upgrade(); - auto [it, inserted] = m_fragment_shader_cache.try_emplace(new_fp_key); + auto [it, inserted] = m_fragment_shader_cache.try_emplace(rsx_fp); new_shader = &(it->second); recompile = inserted; - } - if (recompile) - { - backend_traits::recompile_fragment_program(rsx_fp, *new_shader, m_next_id++); - } - else - { - free(fragment_program_ucode_copy); + if (inserted) + { + it->first.clone_data(); + backend_traits::recompile_fragment_program(rsx_fp, *new_shader, m_next_id++); + } } return std::forward_as_tuple(*new_shader, false); @@ -330,94 +301,18 @@ public: ~program_state_cache() {} - // Returns 2 booleans. - // First flag hints that there is more work to do (busy hint) - // Second flag is true if at least one program has been linked successfully (sync hint) template - std::pair async_update(u32 max_decompile_count, Args&& ...args) - { - // Decompile shaders and link one pipeline object per 'run' - // NOTE: Linking is much slower than decompilation step, so always decompile at least 1 unit - // TODO: Use try_lock instead - bool busy = false; - bool sync = false; - u32 count = 0; - - while (true) - { - { - reader_lock lock(m_decompiler_mutex); - if (m_decompile_queue.empty()) - { - break; - } - } - - // Decompile - const auto& vp_search = search_vertex_program(m_decompile_queue.front().vertex_program, true); - const auto& fp_search = search_fragment_program(m_decompile_queue.front().fragment_program, true); - - const bool already_existing_fragment_program = std::get<1>(fp_search); - const bool already_existing_vertex_program = std::get<1>(vp_search); - const vertex_program_type& vertex_program = std::get<0>(vp_search); - const fragment_program_type& fragment_program = std::get<0>(fp_search); - const pipeline_key key = { vertex_program.id, fragment_program.id, m_decompile_queue.front().properties }; - - // Retest - bool found = false; - if (already_existing_vertex_program && already_existing_fragment_program) - { - if (auto I = m_storage.find(key); I != m_storage.end()) - { - found = true; - } - } - - if (!found) - { - pipeline_storage_type pipeline = backend_traits::build_pipeline(vertex_program, fragment_program, m_decompile_queue.front().properties, std::forward(args)...); - rsx_log.success("New program compiled successfully"); - sync = true; - - if (notify_pipeline_compiled) - { - notify_pipeline_compiled(m_decompile_queue.front().properties, m_decompile_queue.front().vertex_program, m_decompile_queue.front().fragment_program); - } - - std::scoped_lock lock(m_pipeline_mutex); - m_storage[key] = std::move(pipeline); - } - - { - std::scoped_lock lock(m_decompiler_mutex); - m_decompile_queue.pop_front(); - m_decompiler_map.erase(key); - } - - if (++count >= max_decompile_count) - { - // Allows configurable decompiler 'load' - // Smaller unit count will release locks faster - busy = true; - break; - } - } - - return { busy, sync }; - } - - template - pipeline_storage_type& get_graphics_pipeline( + pipeline_type* get_graphics_pipeline( const RSXVertexProgram& vertexShader, const RSXFragmentProgram& fragmentShader, pipeline_properties& pipelineProperties, - bool allow_async, + bool compile_async, bool allow_notification, Args&& ...args ) { - const auto &vp_search = search_vertex_program(vertexShader, !allow_async); - const auto &fp_search = search_fragment_program(fragmentShader, !allow_async); + const auto &vp_search = search_vertex_program(vertexShader); + const auto &fp_search = search_fragment_program(fragmentShader); const bool already_existing_fragment_program = std::get<1>(fp_search); const bool already_existing_vertex_program = std::get<1>(vp_search); @@ -427,62 +322,79 @@ public: m_cache_miss_flag = true; - if (!allow_async || (already_existing_vertex_program && already_existing_fragment_program)) + if (already_existing_vertex_program && already_existing_fragment_program) { + // There is a high chance the pipeline object was compiled if the two shaders already existed before backend_traits::validate_pipeline_properties(vertex_program, fragment_program, pipelineProperties); + reader_lock lock(m_pipeline_mutex); + if (const auto I = m_storage.find(key); I != m_storage.end()) { - reader_lock lock(m_pipeline_mutex); - if (const auto I = m_storage.find(key); I != m_storage.end()) - { - m_cache_miss_flag = false; - return I->second; - } + m_cache_miss_flag = (I->second == __null_pipeline_handle); + return I->second.get(); + } + } + + { + std::lock_guard lock(m_pipeline_mutex); + + // Check if another submission completed in the mean time + if (const auto I = m_storage.find(key); I != m_storage.end()) + { + m_cache_miss_flag = (I->second == __null_pipeline_handle); + return I->second.get(); } - if (!allow_async) - { - rsx_log.notice("Add program (vp id = %d, fp id = %d)", vertex_program.id, fragment_program.id); - pipeline_storage_type pipeline = backend_traits::build_pipeline(vertex_program, fragment_program, pipelineProperties, std::forward(args)...); + // Insert a placeholder if the key still doesn't exist to avoid re-linking of the same pipeline + m_storage[key] = std::move(__null_pipeline_handle); + } - if (allow_notification && notify_pipeline_compiled) + rsx_log.notice("Add program (vp id = %d, fp id = %d)", vertex_program.id, fragment_program.id); + + std::function callback; + + if (allow_notification) + { + callback = [this, vertexShader, fragmentShader_ = RSXFragmentProgram::clone(fragmentShader), key] + (pipeline_storage_type& pipeline) -> pipeline_type* + { + if (!pipeline) { - notify_pipeline_compiled(pipelineProperties, vertexShader, fragmentShader); - rsx_log.success("New program compiled successfully"); + return nullptr; + } + + rsx_log.success("Program compiled successfully"); + notify_pipeline_compiled(key.properties, vertexShader, fragmentShader_); + + std::lock_guard lock(m_pipeline_mutex); + auto& pipe_result = m_storage[key]; + pipe_result = std::move(pipeline); + return pipe_result.get(); + }; + } + else + { + callback = [this, key](pipeline_storage_type& pipeline) -> pipeline_type* + { + if (!pipeline) + { + return nullptr; } std::lock_guard lock(m_pipeline_mutex); - auto &rtn = m_storage[key] = std::move(pipeline); - return rtn; - } + auto& pipe_result = m_storage[key]; + pipe_result = std::move(pipeline); + return pipe_result.get(); + }; } - verify(HERE), allow_async; - - std::scoped_lock lock(m_decompiler_mutex, m_pipeline_mutex); - - // Rechecks - if (already_existing_vertex_program && already_existing_fragment_program) - { - if (const auto I = m_storage.find(key); I != m_storage.end()) - { - m_cache_miss_flag = false; - return I->second; - } - - if (const auto I = m_decompiler_map.find(key); I != m_decompiler_map.end()) - { - // Already in queue - return __null_pipeline_handle; - } - - m_decompiler_map[key] = true; - } - - // Enqueue if not already queued - m_decompile_queue.emplace_back(vertexShader, fragmentShader, pipelineProperties); - - return __null_pipeline_handle; + return backend_traits::build_pipeline( + vertex_program, // VS, must already be decompiled and recompiled above + fragment_program, // FS, must already be decompiled and recompiled above + pipelineProperties, // Pipeline state + compile_async, // Allow asynchronous compilation + callback, // Insertion and notification callback + std::forward(args)...); // Other arguments } void fill_fragment_constants_buffer(gsl::span dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize = false) const @@ -497,7 +409,7 @@ public: alignas(16) f32 tmp[4]; for (size_t offset_in_fragment_program : I->second.FragmentConstantOffsetCache) { - char* data = static_cast(fragment_program.addr) + offset_in_fragment_program; + char* data = static_cast(fragment_program.get_data()) + offset_in_fragment_program; const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data)); const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8)); @@ -546,11 +458,6 @@ public: { std::scoped_lock lock(m_vertex_mutex, m_fragment_mutex, m_decompiler_mutex, m_pipeline_mutex); - for (auto& pair : m_fragment_shader_cache) - { - free(pair.first.addr); - } - notify_pipeline_compiled = {}; m_fragment_shader_cache.clear(); m_vertex_shader_cache.clear(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index b5fbe91323..04a275f1bc 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -59,10 +59,35 @@ void GLGSRender::on_init_thread() m_context = m_frame->make_context(); const auto shadermode = g_cfg.video.shadermode.get(); - - if (shadermode == shader_mode::async_recompiler || shadermode == shader_mode::async_with_interpreter) + if (shadermode != shader_mode::recompiler) { - m_decompiler_context = m_frame->make_context(); + auto context_create_func = [m_frame = m_frame]() + { + return m_frame->make_context(); + }; + + auto context_bind_func = [m_frame = m_frame](draw_context_t ctx) + { + m_frame->set_current(ctx); + }; + + auto context_destroy_func = [m_frame = m_frame](draw_context_t ctx) + { + m_frame->delete_context(ctx); + }; + + int thread_count = g_cfg.video.shader_compiler_threads_count; + if (!thread_count) thread_count = -1; + gl::initialize_pipe_compiler(context_create_func, context_bind_func, context_destroy_func, thread_count); + } + else + { + auto null_context_create_func = []() -> draw_context_t + { + return nullptr; + }; + + gl::initialize_pipe_compiler(null_context_create_func, {}, {}, 1); } // Bind primary context to main RSX thread @@ -342,6 +367,8 @@ void GLGSRender::on_exit() gl::g_typeless_transfer_buffer.remove(); } + gl::destroy_pipe_compiler(); + m_prog_buffer.clear(); m_rtts.destroy(); @@ -653,7 +680,7 @@ bool GLGSRender::load_program() { void* pipeline_properties = nullptr; m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties, - shadermode != shader_mode::recompiler, true).get(); + shadermode != shader_mode::recompiler, true); if (m_prog_buffer.check_cache_missed()) { @@ -838,8 +865,7 @@ void GLGSRender::load_program_env() // Bind textures m_shader_interpreter.update_fragment_textures(fs_sampler_state, current_fp_metadata.referenced_textures_mask, reinterpret_cast(fp_buf + 16)); - const auto fp_data = static_cast(current_fragment_program.addr) + current_fp_metadata.program_start_offset; - std::memcpy(fp_buf + 80, fp_data, current_fp_metadata.program_ucode_length); + std::memcpy(fp_buf + 80, current_fragment_program.get_data(), current_fragment_program.ucode_length); m_fragment_instructions_buffer->bind_range(GL_INTERPRETER_FRAGMENT_BLOCK, fp_mapping.second, fp_block_length); m_fragment_instructions_buffer->notify(); @@ -1072,20 +1098,3 @@ void GLGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* que glEndQuery(GL_ANY_SAMPLES_PASSED); } } - -void GLGSRender::on_decompiler_init() -{ - // Bind decompiler context to this thread - m_frame->set_current(m_decompiler_context); -} - -void GLGSRender::on_decompiler_exit() -{ - // Cleanup - m_frame->delete_context(m_decompiler_context); -} - -bool GLGSRender::on_decompiler_task() -{ - return m_prog_buffer.async_update(8).first; -} diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index d7efa30333..a3ce62351d 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -121,7 +121,6 @@ private: std::list work_queue; GLProgramBuffer m_prog_buffer; - draw_context_t m_decompiler_context; //buffer gl::fbo* m_draw_fbo = nullptr; @@ -198,8 +197,4 @@ protected: std::array, 4> copy_render_targets_to_memory() override; std::array, 2> copy_depth_stencil_buffer_to_memory() override; - - void on_decompiler_init() override; - void on_decompiler_exit() override; - bool on_decompiler_task() override; }; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.cpp b/rpcs3/Emu/RSX/GL/GLHelpers.cpp index cc4721480c..12946e7a77 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.cpp +++ b/rpcs3/Emu/RSX/GL/GLHelpers.cpp @@ -23,6 +23,18 @@ namespace gl return s_tls_primary_context_thread; } + void flush_command_queue(fence& fence_obj) + { + if (is_primary_context_thread()) + { + fence_obj.check_signaled(); + } + else + { + glFlush(); + } + } + GLenum draw_mode(rsx::primitive_type in) { switch (in) diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index d9416b80e5..4128d4b052 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -51,12 +51,15 @@ namespace gl else\ gl##func##EXT(texture_name, target, __VA_ARGS__); + class fence; + void enable_debugging(); bool is_primitive_native(rsx::primitive_type in); GLenum draw_mode(rsx::primitive_type in); void set_primary_context_thread(bool = true); bool is_primary_context_thread(); + void flush_command_queue(fence& fence_obj); // Texture helpers std::array apply_swizzle_remap(const std::array& swizzle_remap, const std::pair, std::array>& decoded_remap); @@ -76,8 +79,8 @@ namespace gl class fence { GLsync m_value = nullptr; - GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; - bool signaled = false; + mutable GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; + mutable bool signaled = false; public: @@ -104,12 +107,12 @@ namespace gl create(); } - bool is_empty() + bool is_empty() const { return (m_value == nullptr); } - bool check_signaled() + bool check_signaled() const { verify(HERE), m_value != nullptr; @@ -2222,6 +2225,8 @@ public: ::glsl::program_domain type; GLuint m_id = GL_NONE; + fence m_compiled_fence; + public: shader() = default; @@ -2245,10 +2250,7 @@ public: { type = type_; source = src; - } - shader& compile() - { GLenum shader_type; switch (type) { @@ -2266,6 +2268,10 @@ public: } m_id = glCreateShader(shader_type); + } + + shader& compile() + { const char* str = source.c_str(); const GLint length = ::narrow(source.length()); @@ -2310,6 +2316,8 @@ public: rsx_log.fatal("Compilation failed: %s", error_msg); } + m_compiled_fence.create(); + flush_command_queue(m_compiled_fence); return *this; } @@ -2332,6 +2340,11 @@ public: return source; } + fence get_compile_fence_sync() const + { + return m_compiled_fence; + } + void set_id(uint id) { m_id = id; @@ -2348,6 +2361,19 @@ public: } }; + class shader_view : public shader + { + public: + shader_view(GLuint id) : shader(id) + { + } + + ~shader_view() + { + set_id(0); + } + }; + class program { GLuint m_id = GL_NONE; @@ -2548,11 +2574,7 @@ public: } m_fence.create(); - - if (!is_primary_context_thread()) - { - glFlush(); - } + flush_command_queue(m_fence); } } @@ -2637,18 +2659,6 @@ public: return glGetUniformLocation(m_id, name.c_str()); } - program& operator += (const shader& rhs) - { - return attach(rhs); - } - - program& operator += (std::initializer_list shaders) - { - for (auto &shader : shaders) - *this += shader; - return *this; - } - program() = default; program(const program&) = delete; program(program&& program_) @@ -2683,19 +2693,6 @@ public: } }; - class shader_view : public shader - { - public: - shader_view(GLuint id) : shader(id) - { - } - - ~shader_view() - { - set_id(0); - } - }; - class program_view : public program { public: diff --git a/rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp b/rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp new file mode 100644 index 0000000000..4f3a563945 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp @@ -0,0 +1,149 @@ +#include "stdafx.h" +#include "GLPipelineCompiler.h" +#include "Utilities/Thread.h" + +#include + +namespace gl +{ + // Global list of worker threads + std::unique_ptr> g_pipe_compilers; + int g_num_pipe_compilers = 0; + atomic_t g_compiler_index{}; + + pipe_compiler::pipe_compiler() + { + } + + pipe_compiler::~pipe_compiler() + { + if (m_context_destroy_func) + { + m_context_destroy_func(m_context); + } + } + + void pipe_compiler::initialize( + std::function context_create_func, + std::function context_bind_func, + std::function context_destroy_func) + { + m_context_bind_func = context_bind_func; + m_context_destroy_func = context_destroy_func; + + m_context = context_create_func(); + } + + void pipe_compiler::operator()() + { + while (thread_ctrl::state() != thread_state::aborting) + { + for (auto&& job : m_work_queue.pop_all()) + { + if (m_context_ready.compare_and_swap_test(false, true)) + { + // Bind context on first use + m_context_bind_func(m_context); + } + + auto result = int_compile_graphics_pipe( + job.vp_handle, job.fp_handle, + job.post_create_func, + job.post_link_func); + + job.completion_callback(result); + } + + m_work_queue.wait(); + } + } + + std::unique_ptr pipe_compiler::compile( + GLuint vp_handle, GLuint fp_handle, + op_flags flags, + callback_t post_create_func, + callback_t post_link_func, + callback_t completion_callback_func) + { + if (flags == COMPILE_INLINE) + { + return int_compile_graphics_pipe(vp_handle, fp_handle, post_create_func, post_link_func); + } + + m_work_queue.push(vp_handle, fp_handle, post_create_func, post_link_func, completion_callback_func); + return {}; + } + + std::unique_ptr pipe_compiler::int_compile_graphics_pipe( + GLuint vp_handle, GLuint fp_handle, + callback_t post_create_func, + callback_t post_link_func) + { + auto result = std::make_unique(); + result->create(); + + if (post_create_func) + { + post_create_func(result); + } + + result->link(); + + if (post_link_func) + { + post_link_func(result); + } + + return result; + } + + void initialize_pipe_compiler( + std::function context_create_func, + std::function context_bind_func, + std::function context_destroy_func, + int num_worker_threads) + { + if (num_worker_threads == -1) + { + // Select optimal number of compiler threads + const auto hw_threads = std::thread::hardware_concurrency(); + if (hw_threads >= 12) + { + num_worker_threads = 4; + } + else if (hw_threads >= 8) + { + num_worker_threads = 2; + } + else + { + num_worker_threads = 1; + } + } + + verify(HERE), num_worker_threads >= 1; + + // Create the thread pool + g_pipe_compilers = std::make_unique>("RSX.W", num_worker_threads); + g_num_pipe_compilers = num_worker_threads; + + // Initialize the workers. At least one inline compiler shall exist (doesn't actually run) + for (pipe_compiler& compiler : *g_pipe_compilers.get()) + { + compiler.initialize(context_create_func, context_bind_func, context_destroy_func); + } + } + + void destroy_pipe_compiler() + { + g_pipe_compilers.reset(); + } + + pipe_compiler* get_pipe_compiler() + { + verify(HERE), g_pipe_compilers; + int thread_index = g_compiler_index++; + + return g_pipe_compilers.get()->begin() + (thread_index % g_num_pipe_compilers); + } +} diff --git a/rpcs3/Emu/RSX/GL/GLPipelineCompiler.h b/rpcs3/Emu/RSX/GL/GLPipelineCompiler.h new file mode 100644 index 0000000000..c8b7d1d597 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLPipelineCompiler.h @@ -0,0 +1,72 @@ +#pragma once +#include "GLHelpers.h" +#include "Emu/RSX/display.h" +#include "Utilities/lockless.h" + +namespace gl +{ + class pipe_compiler + { + public: + enum op_flags + { + COMPILE_DEFAULT = 0, + COMPILE_INLINE = 1, + COMPILE_DEFERRED = 2 + }; + + using callback_t = std::function&)>; + + pipe_compiler(); + ~pipe_compiler(); + + void initialize( + std::function context_create_func, + std::function context_bind_func, + std::function context_destroy_func); + + std::unique_ptr compile( + GLuint vp_handle, GLuint fp_handle, + op_flags flags, + callback_t post_create_func = {}, + callback_t post_link_func = {}, + callback_t completion_callback = {}); + + void operator()(); + + private: + + struct pipe_compiler_job + { + GLuint vp_handle; + GLuint fp_handle; + callback_t post_create_func; + callback_t post_link_func; + callback_t completion_callback; + + pipe_compiler_job(GLuint vp, GLuint fp, callback_t post_create, callback_t post_link, callback_t completion) + : vp_handle(vp), fp_handle(fp), post_create_func(post_create), post_link_func(post_link), completion_callback(completion) + {} + }; + + lf_queue m_work_queue; + + draw_context_t m_context = 0; + atomic_t m_context_ready = false; + + std::function m_context_bind_func; + std::function m_context_destroy_func; + + std::unique_ptr int_compile_graphics_pipe( + GLuint vp_handle, GLuint fp_handle, callback_t post_create_func, callback_t post_link_func); + }; + + void initialize_pipe_compiler( + std::function context_create_func, + std::function context_bind_func, + std::function contextdestroy_func, + int num_worker_threads = -1); + + void destroy_pipe_compiler(); + pipe_compiler* get_pipe_compiler(); +} diff --git a/rpcs3/Emu/RSX/GL/GLProgramBuffer.h b/rpcs3/Emu/RSX/GL/GLProgramBuffer.h index 0864cac8d7..ca3ab0ddcc 100644 --- a/rpcs3/Emu/RSX/GL/GLProgramBuffer.h +++ b/rpcs3/Emu/RSX/GL/GLProgramBuffer.h @@ -2,6 +2,7 @@ #include "GLVertexProgram.h" #include "GLFragmentProgram.h" #include "GLHelpers.h" +#include "GLPipelineCompiler.h" #include "../Common/ProgramStateCache.h" #include "../rsx_utils.h" @@ -9,6 +10,7 @@ struct GLTraits { using vertex_program_type = GLVertexProgram; using fragment_program_type = GLFragmentProgram; + using pipeline_type = gl::glsl::program; using pipeline_storage_type = std::unique_ptr; using pipeline_properties = void*; @@ -32,60 +34,86 @@ struct GLTraits } static - pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties&) + pipeline_type* build_pipeline( + const vertex_program_type &vertexProgramData, + const fragment_program_type &fragmentProgramData, + const pipeline_properties&, + bool compile_async, + std::function callback) { - pipeline_storage_type result = std::make_unique(); - result->create() - .attach(gl::glsl::shader_view(vertexProgramData.id)) - .attach(gl::glsl::shader_view(fragmentProgramData.id)) - .bind_fragment_data_location("ocol0", 0) - .bind_fragment_data_location("ocol1", 1) - .bind_fragment_data_location("ocol2", 2) - .bind_fragment_data_location("ocol3", 3) - .link([](gl::glsl::program* program) - { - // Program locations are guaranteed to not change after linking - // Texture locations are simply bound to the TIUs so this can be done once - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - int location; - if (program->uniforms.has_location(rsx::constants::fragment_texture_names[i], &location)) - { - // Assign location to TIU - program->uniforms[location] = GL_FRAGMENT_TEXTURES_START + i; + auto compiler = gl::get_pipe_compiler(); + auto flags = (compile_async) ? gl::pipe_compiler::COMPILE_DEFERRED : gl::pipe_compiler::COMPILE_INLINE; - // Check for stencil mirror - const std::string mirror_name = std::string(rsx::constants::fragment_texture_names[i]) + "_stencil"; - if (program->uniforms.has_location(mirror_name, &location)) - { - // Assign mirror to TIU - program->uniforms[location] = GL_STENCIL_MIRRORS_START + i; - } - } - } - - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) - { - int location; - if (program->uniforms.has_location(rsx::constants::vertex_texture_names[i], &location)) - program->uniforms[location] = GL_VERTEX_TEXTURES_START + i; - } - - // Bind locations 0 and 1 to the stream buffers - program->uniforms[0] = GL_STREAM_BUFFER_START + 0; - program->uniforms[1] = GL_STREAM_BUFFER_START + 1; - }); - - if (g_cfg.video.log_programs) + gl::fence vp_fence, fp_fence; + if (compile_async) { - rsx_log.notice("*** prog id = %d", result->id()); - rsx_log.notice("*** vp id = %d", vertexProgramData.id); - rsx_log.notice("*** fp id = %d", fragmentProgramData.id); - rsx_log.notice("*** vp shader = \n%s", vertexProgramData.shader.get_source().c_str()); - rsx_log.notice("*** fp shader = \n%s", fragmentProgramData.shader.get_source().c_str()); + vp_fence = vertexProgramData.shader.get_compile_fence_sync(); + fp_fence = fragmentProgramData.shader.get_compile_fence_sync(); } - return result; + auto post_create_func = [vp_id = vertexProgramData.id, fp_id = fragmentProgramData.id, vp_fence, fp_fence] + (std::unique_ptr& program) + { + if (!vp_fence.is_empty()) + { + // Force server threads to wait for the compilation to finish + vp_fence.server_wait_sync(); + fp_fence.server_wait_sync(); + } + + program->attach(gl::glsl::shader_view(vp_id)) + .attach(gl::glsl::shader_view(fp_id)) + .bind_fragment_data_location("ocol0", 0) + .bind_fragment_data_location("ocol1", 1) + .bind_fragment_data_location("ocol2", 2) + .bind_fragment_data_location("ocol3", 3); + + if (g_cfg.video.log_programs) + { + rsx_log.notice("*** prog id = %d", program->id()); + rsx_log.notice("*** vp id = %d", vp_id); + rsx_log.notice("*** fp id = %d", fp_id); + } + }; + + auto post_link_func = [](std::unique_ptr& program) + { + // Program locations are guaranteed to not change after linking + // Texture locations are simply bound to the TIUs so this can be done once + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + int location; + if (program->uniforms.has_location(rsx::constants::fragment_texture_names[i], &location)) + { + // Assign location to TIU + program->uniforms[location] = GL_FRAGMENT_TEXTURES_START + i; + + // Check for stencil mirror + const std::string mirror_name = std::string(rsx::constants::fragment_texture_names[i]) + "_stencil"; + if (program->uniforms.has_location(mirror_name, &location)) + { + // Assign mirror to TIU + program->uniforms[location] = GL_STENCIL_MIRRORS_START + i; + } + } + } + + for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + { + int location; + if (program->uniforms.has_location(rsx::constants::vertex_texture_names[i], &location)) + program->uniforms[location] = GL_VERTEX_TEXTURES_START + i; + } + + // Bind locations 0 and 1 to the stream buffers + program->uniforms[0] = GL_STREAM_BUFFER_START + 0; + program->uniforms[1] = GL_STREAM_BUFFER_START + 1; + }; + + auto pipeline = compiler->compile(vertexProgramData.id, fragmentProgramData.id, + flags, post_create_func, post_link_func, callback); + + return callback(pipeline); } }; diff --git a/rpcs3/Emu/RSX/RSXFragmentProgram.h b/rpcs3/Emu/RSX/RSXFragmentProgram.h index 6b033d76cf..5f86d2bf08 100644 --- a/rpcs3/Emu/RSX/RSXFragmentProgram.h +++ b/rpcs3/Emu/RSX/RSXFragmentProgram.h @@ -228,23 +228,61 @@ static const std::string rsx_fp_op_names[] = struct RSXFragmentProgram { - void *addr; - u32 offset; - u32 ucode_length; - u32 total_length; - u32 ctrl; - u16 unnormalized_coords; - u16 redirected_textures; - u16 shadow_textures; - bool two_sided_lighting; - u32 texture_dimensions; - u32 texcoord_control_mask; + struct data_storage_helper + { + void* data_ptr = nullptr; + std::vector local_storage; + + data_storage_helper() = default; + + data_storage_helper(void* ptr) + { + data_ptr = ptr; + local_storage.clear(); + } + + data_storage_helper(const data_storage_helper& other) + { + if (other.data_ptr == other.local_storage.data()) + { + local_storage = other.local_storage; + data_ptr = local_storage.data(); + } + else + { + data_ptr = other.data_ptr; + local_storage.clear(); + } + } + + void deep_copy(u32 max_length) + { + if (local_storage.empty() && data_ptr) + { + local_storage.resize(max_length); + std::memcpy(local_storage.data(), data_ptr, max_length); + data_ptr = local_storage.data(); + } + } + + } mutable data; + + u32 offset = 0; + u32 ucode_length = 0; + u32 total_length = 0; + u32 ctrl = 0; + u16 unnormalized_coords = 0; + u16 redirected_textures = 0; + u16 shadow_textures = 0; + bool two_sided_lighting = false; + u32 texture_dimensions = 0; + u32 texcoord_control_mask = 0; float texture_scale[16][4]; u8 textures_alpha_kill[16]; u8 textures_zfunc[16]; - bool valid; + bool valid = false; rsx::texture_dimension_extended get_texture_dimension(u8 id) const { @@ -264,6 +302,26 @@ struct RSXFragmentProgram RSXFragmentProgram() { - memset(this, 0, sizeof(RSXFragmentProgram)); + std::memset(texture_scale, 0, sizeof(float) * 16 * 4); + std::memset(textures_alpha_kill, 0, sizeof(u8) * 16); + std::memset(textures_zfunc, 0, sizeof(u8) * 16); + } + + static RSXFragmentProgram clone(const RSXFragmentProgram& prog) + { + auto result = prog; + result.clone_data(); + return result; + } + + void* get_data() const + { + return data.data_ptr; + } + + void clone_data() const + { + verify(HERE), ucode_length; + data.deep_copy(ucode_length); } }; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index ee4e6b0321..790391ff83 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -561,41 +561,6 @@ namespace rsx } }); - g_fxo->init("RSX Decompiler Thread", [this] - { - const auto shadermode = g_cfg.video.shadermode.get(); - - if (shadermode != shader_mode::async_recompiler && shadermode != shader_mode::async_with_interpreter) - { - // Die - return; - } - - on_decompiler_init(); - - if (g_cfg.core.thread_scheduler_enabled) - { - thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx)); - } - - while (!Emu.IsStopped() && !m_rsx_thread_exiting) - { - if (!on_decompiler_task()) - { - if (Emu.IsPaused()) - { - std::this_thread::sleep_for(1ms); - } - else - { - std::this_thread::sleep_for(500us); - } - } - } - - on_decompiler_exit(); - }); - // Raise priority above other threads thread_ctrl::set_native_priority(1); @@ -1779,10 +1744,10 @@ namespace rsx const auto [program_offset, program_location] = method_registers.shader_program_address(); - result.addr = vm::base(rsx::get_address(program_offset, program_location, HERE)); - current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr); + result.data = vm::base(rsx::get_address(program_offset, program_location, HERE)); + current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.get_data()); - result.addr = (static_cast(result.addr) + current_fp_metadata.program_start_offset); + result.data = (static_cast(result.get_data()) + current_fp_metadata.program_start_offset); result.offset = program_offset + current_fp_metadata.program_start_offset; result.ucode_length = current_fp_metadata.program_ucode_length; result.total_length = result.ucode_length + current_fp_metadata.program_start_offset; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 4346b3f5eb..1f501e5975 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -826,10 +826,6 @@ namespace rsx */ virtual void do_local_task(FIFO_state state); - virtual void on_decompiler_init() {} - virtual void on_decompiler_exit() {} - virtual bool on_decompiler_task() { return false; } - virtual void emit_geometry(u32) {} void run_FIFO(); diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 8555d4e765..2a54015bc6 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -1,5 +1,6 @@ #pragma once #include "VKHelpers.h" +#include "VKPipelineCompiler.h" #include "VKRenderPass.h" #include "Utilities/StrUtil.h" #include "Emu/IdManager.h" @@ -177,10 +178,8 @@ namespace vk info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; - VkPipeline pipeline; - vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &info, nullptr, &pipeline); - - m_program = std::make_unique(*get_current_renderer(), pipeline, m_pipeline_layout); + auto compiler = vk::get_pipe_compiler(); + m_program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE); declare_inputs(); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 0b6a2f790e..5e1f755906 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -473,7 +473,10 @@ VKGSRender::VKGSRender() : GSRender() null_buffer = std::make_unique(*m_device, 32, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); null_buffer_view = std::make_unique(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32); + int thread_count = g_cfg.video.shader_compiler_threads_count; + if (!thread_count) thread_count = -1; vk::initialize_compiler_context(); + vk::initialize_pipe_compiler(thread_count); if (g_cfg.video.overlay) { @@ -482,7 +485,7 @@ VKGSRender::VKGSRender() : GSRender() m_text_writer->init(*m_device, vk::get_renderpass(*m_device, key)); } - m_prog_buffer = std::make_unique + m_prog_buffer = std::make_unique ( [this](const vk::pipeline_props& props, const RSXVertexProgram& vp, const RSXFragmentProgram& fp) { @@ -561,8 +564,9 @@ VKGSRender::~VKGSRender() m_texture_cache.destroy(); //Shaders - vk::finalize_compiler_context(); - m_prog_buffer->clear(); + vk::destroy_pipe_compiler(); // Ensure no pending shaders being compiled + vk::finalize_compiler_context(); // Shut down the glslang compiler + m_prog_buffer->clear(); // Delete shader objects m_shader_interpreter.destroy(); m_persistent_attribute_storage.reset(); @@ -1629,7 +1633,7 @@ bool VKGSRender::load_program() vertex_program.skip_vertex_input_check = true; fragment_program.unnormalized_coords = 0; m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties, - shadermode != shader_mode::recompiler, true, *m_device, pipeline_layout).get(); + shadermode != shader_mode::recompiler, true, *m_device, pipeline_layout); vk::leave_uninterruptible(); @@ -1815,8 +1819,7 @@ void VKGSRender::load_program_env() control_masks[0] = rsx::method_registers.shader_control(); control_masks[1] = current_fragment_program.texture_dimensions; - const auto fp_data = static_cast(current_fragment_program.addr) + current_fp_metadata.program_start_offset; - std::memcpy(fp_buf + 16, fp_data, current_fp_metadata.program_ucode_length); + std::memcpy(fp_buf + 16, current_fragment_program.get_data(), current_fragment_program.ucode_length); m_fragment_instructions_buffer.unmap(); m_fragment_instructions_buffer_info = { m_fragment_instructions_buffer.heap->value, fp_mapping, fp_block_length }; @@ -2499,8 +2502,3 @@ void VKGSRender::end_conditional_rendering() { thread::end_conditional_rendering(); } - -bool VKGSRender::on_decompiler_task() -{ - return m_prog_buffer->async_update(8, *m_device, pipeline_layout).first; -} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 9f07565623..1c44da384c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -22,7 +22,7 @@ namespace vk using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache; using null_vertex_cache = vertex_cache; - using shader_cache = rsx::shaders_cache; + using shader_cache = rsx::shaders_cache; struct vertex_upload_info { @@ -390,7 +390,7 @@ public: std::unique_ptr m_shaders_cache; private: - std::unique_ptr m_prog_buffer; + std::unique_ptr m_prog_buffer; std::unique_ptr m_swapchain; vk::context m_thread_context; @@ -573,6 +573,4 @@ protected: bool on_access_violation(u32 address, bool is_writing) override; void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override; void on_semaphore_acquire_wait() override; - - bool on_decompiler_task() override; }; diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index ff32813ea8..054aa1366c 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -6,6 +6,7 @@ #include "VKFramebuffer.h" #include "VKResourceManager.h" #include "VKRenderPass.h" +#include "VKPipelineCompiler.h" #include "../Overlays/overlays.h" @@ -233,7 +234,6 @@ namespace vk vp.scissorCount = 1; vp.viewportCount = 1; - VkPipeline pipeline; VkGraphicsPipelineCreateInfo info = {}; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.pVertexInputState = &vi; @@ -251,9 +251,8 @@ namespace vk info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = render_pass; - CHECK_RESULT(vkCreateGraphicsPipelines(*m_device, nullptr, 1, &info, NULL, &pipeline)); - - auto program = std::make_unique(*m_device, pipeline, m_pipeline_layout, get_vertex_inputs(), get_fragment_inputs()); + auto compiler = vk::get_pipe_compiler(); + auto program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, get_vertex_inputs(), get_fragment_inputs()); auto result = program.get(); m_program_cache[storage_key] = std::move(program); diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp new file mode 100644 index 0000000000..fdba500f2a --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp @@ -0,0 +1,231 @@ +#include "stdafx.h" +#include "VKPipelineCompiler.h" +#include "VKRenderPass.h" +#include "Utilities/Thread.h" + +#include + +namespace vk +{ + // Global list of worker threads + std::unique_ptr> g_pipe_compilers; + int g_num_pipe_compilers = 0; + atomic_t g_compiler_index{}; + + pipe_compiler::pipe_compiler() + { + // TODO: Initialize workqueue + } + + pipe_compiler::~pipe_compiler() + { + // TODO: Destroy and do cleanup + } + + void pipe_compiler::initialize(const vk::render_device* pdev) + { + m_device = pdev; + } + + void pipe_compiler::operator()() + { + while (thread_ctrl::state() != thread_state::aborting) + { + for (auto&& job : m_work_queue.pop_all()) + { + if (job.is_graphics_job) + { + auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.pipe_layout, job.inputs, {}); + job.callback_func(compiled); + } + else + { + auto compiled = int_compile_compute_pipe(job.compute_data, job.pipe_layout); + job.callback_func(compiled); + } + } + + m_work_queue.wait(); + } + } + + std::unique_ptr pipe_compiler::int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout) + { + VkPipeline pipeline; + vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &create_info, nullptr, &pipeline); + return std::make_unique(*m_device, pipeline, pipe_layout); + } + + std::unique_ptr pipe_compiler::int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout, + const std::vector& vs_inputs, const std::vector& fs_inputs) + { + VkPipeline pipeline; + CHECK_RESULT(vkCreateGraphicsPipelines(*m_device, nullptr, 1, &create_info, NULL, &pipeline)); + auto result = std::make_unique(*m_device, pipeline, pipe_layout, vs_inputs, fs_inputs); + result->link(); + return result; + } + + std::unique_ptr pipe_compiler::int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout, + const std::vector& vs_inputs, const std::vector& fs_inputs) + { + VkPipelineShaderStageCreateInfo shader_stages[2] = {}; + shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stages[0].module = modules[0]; + shader_stages[0].pName = "main"; + + shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stages[1].module = modules[1]; + shader_stages[1].pName = "main"; + + std::vector dynamic_state_descriptors; + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_VIEWPORT); + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_SCISSOR); + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_LINE_WIDTH); + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS); + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK); + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE); + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS); + + if (vk::get_current_renderer()->get_depth_bounds_support()) + { + dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); + } + + VkPipelineDynamicStateCreateInfo dynamic_state_info = {}; + dynamic_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_info.pDynamicStates = dynamic_state_descriptors.data(); + dynamic_state_info.dynamicStateCount = ::size32(dynamic_state_descriptors); + + VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO }; + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.viewportCount = 1; + vp.scissorCount = 1; + + VkPipelineMultisampleStateCreateInfo ms = create_info.state.ms; + verify("Multisample state mismatch!" HERE), ms.rasterizationSamples == VkSampleCountFlagBits((create_info.renderpass_key >> 16) & 0xF); + if (ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT) + { + // Update the sample mask pointer + ms.pSampleMask = &create_info.state.temp_storage.msaa_sample_mask; + } + + // Rebase pointers from pipeline structure in case it is moved/copied + VkPipelineColorBlendStateCreateInfo cs = create_info.state.cs; + cs.pAttachments = create_info.state.att_state; + + VkGraphicsPipelineCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + info.pVertexInputState = &vi; + info.pInputAssemblyState = &create_info.state.ia; + info.pRasterizationState = &create_info.state.rs; + info.pColorBlendState = &cs; + info.pMultisampleState = &ms; + info.pViewportState = &vp; + info.pDepthStencilState = &create_info.state.ds; + info.stageCount = 2; + info.pStages = shader_stages; + info.pDynamicState = &dynamic_state_info; + info.layout = pipe_layout; + info.basePipelineIndex = -1; + info.basePipelineHandle = VK_NULL_HANDLE; + info.renderPass = vk::get_renderpass(*m_device, create_info.renderpass_key); + + return int_compile_graphics_pipe(info, pipe_layout, vs_inputs, fs_inputs); + } + + std::unique_ptr pipe_compiler::compile( + const VkComputePipelineCreateInfo& create_info, + VkPipelineLayout pipe_layout, + op_flags flags, callback_t callback) + { + if (flags == COMPILE_INLINE) + { + return int_compile_compute_pipe(create_info, pipe_layout); + } + + m_work_queue.push(create_info, pipe_layout, callback); + return {}; + } + + std::unique_ptr pipe_compiler::compile( + const VkGraphicsPipelineCreateInfo& create_info, + VkPipelineLayout pipe_layout, + op_flags flags, callback_t /*callback*/, + const std::vector& vs_inputs, const std::vector& fs_inputs) + { + // It is very inefficient to defer this as all pointers need to be saved + verify(HERE), flags == COMPILE_INLINE; + return int_compile_graphics_pipe(create_info, pipe_layout, vs_inputs, fs_inputs); + } + + std::unique_ptr pipe_compiler::compile( + const vk::pipeline_props& create_info, + VkShaderModule module_handles[2], + VkPipelineLayout pipe_layout, + op_flags flags, callback_t callback, + const std::vector& vs_inputs, const std::vector& fs_inputs) + { + if (flags == COMPILE_INLINE) + { + return int_compile_graphics_pipe(create_info, module_handles, pipe_layout, vs_inputs, fs_inputs); + } + + m_work_queue.push(create_info, pipe_layout, module_handles, vs_inputs, fs_inputs, callback); + return {}; + } + + void initialize_pipe_compiler(int num_worker_threads) + { + if (num_worker_threads == -1) + { + // Select optimal number of compiler threads + const auto hw_threads = std::thread::hardware_concurrency(); + if (hw_threads >= 12) + { + num_worker_threads = 4; + } + else if (hw_threads >= 8) + { + num_worker_threads = 2; + } + else + { + num_worker_threads = 1; + } + } + + verify(HERE), num_worker_threads >= 1; + + const vk::render_device* dev = vk::get_current_renderer(); + verify("Cannot initialize pipe compiler before creating a logical device" HERE), dev; + + // Create the thread pool + g_pipe_compilers = std::make_unique>("RSX.W", num_worker_threads); + g_num_pipe_compilers = num_worker_threads; + + // Initialize the workers. At least one inline compiler shall exist (doesn't actually run) + for (pipe_compiler& compiler : *g_pipe_compilers.get()) + { + compiler.initialize(dev); + } + } + + void destroy_pipe_compiler() + { + g_pipe_compilers.reset(); + } + + pipe_compiler* get_pipe_compiler() + { + verify(HERE), g_pipe_compilers; + int thread_index = g_compiler_index++; + + return g_pipe_compilers.get()->begin() + (thread_index % g_num_pipe_compilers); + } +} diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h new file mode 100644 index 0000000000..1e3f7b71ec --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h @@ -0,0 +1,183 @@ +#pragma once +#include "VKHelpers.h" +#include "../rsx_utils.h" +#include "Utilities/hash.h" +#include "Utilities/lockless.h" + +namespace vk +{ + struct pipeline_props + { + graphics_pipeline_state state; + u64 renderpass_key; + + bool operator==(const pipeline_props& other) const + { + if (renderpass_key != other.renderpass_key) + return false; + + if (memcmp(&state.ia, &other.state.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo))) + return false; + + if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState))) + return false; + + if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) + return false; + + // Cannot memcmp cs due to pAttachments being a pointer to memory + if (state.cs.logicOp != other.state.cs.logicOp || + state.cs.logicOpEnable != other.state.cs.logicOpEnable || + memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32))) + return false; + + if (memcmp(&state.ds, &other.state.ds, sizeof(VkPipelineDepthStencilStateCreateInfo))) + return false; + + if (state.ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT) + { + if (memcmp(&state.ms, &other.state.ms, sizeof(VkPipelineMultisampleStateCreateInfo))) + return false; + + if (state.temp_storage.msaa_sample_mask != other.state.temp_storage.msaa_sample_mask) + return false; + } + + return true; + } + }; + + class pipe_compiler + { + public: + enum op_flags + { + COMPILE_DEFAULT = 0, + COMPILE_INLINE = 1, + COMPILE_DEFERRED = 2 + }; + + using callback_t = std::function&)>; + + pipe_compiler(); + ~pipe_compiler(); + + void initialize(const vk::render_device* pdev); + + std::unique_ptr compile( + const VkComputePipelineCreateInfo& create_info, + VkPipelineLayout pipe_layout, + op_flags flags, callback_t callback = {}); + + std::unique_ptr compile( + const VkGraphicsPipelineCreateInfo& create_info, + VkPipelineLayout pipe_layout, + op_flags flags, callback_t callback = {}, + const std::vector& vs_inputs = {}, + const std::vector& fs_inputs = {}); + + std::unique_ptr compile( + const vk::pipeline_props &create_info, + VkShaderModule module_handles[2], + VkPipelineLayout pipe_layout, + op_flags flags, callback_t callback = {}, + const std::vector& vs_inputs = {}, + const std::vector& fs_inputs = {}); + + void operator()(); + + private: + class compute_pipeline_props : public VkComputePipelineCreateInfo + { + // Storage for the entry name + std::string entry_name; + + public: + compute_pipeline_props() = default; + compute_pipeline_props(const VkComputePipelineCreateInfo& info) + { + (*static_cast(this)) = info; + entry_name = info.stage.pName; + stage.pName = entry_name.c_str(); + } + }; + + struct pipe_compiler_job + { + bool is_graphics_job; + callback_t callback_func; + + vk::pipeline_props graphics_data; + compute_pipeline_props compute_data; + VkPipelineLayout pipe_layout; + VkShaderModule graphics_modules[2]; + std::vector inputs; + + pipe_compiler_job( + const vk::pipeline_props& props, + VkPipelineLayout layout, + VkShaderModule modules[2], + const std::vector& vs_in, + const std::vector& fs_in, + callback_t func) + { + callback_func = func; + graphics_data = props; + pipe_layout = layout; + graphics_modules[0] = modules[0]; + graphics_modules[1] = modules[1]; + is_graphics_job = true; + + inputs.reserve(vs_in.size() + fs_in.size()); + inputs.insert(inputs.end(), vs_in.begin(), vs_in.end()); + inputs.insert(inputs.end(), fs_in.begin(), fs_in.end()); + } + + pipe_compiler_job( + const VkComputePipelineCreateInfo& props, + VkPipelineLayout layout, + callback_t func) + { + callback_func = func; + compute_data = props; + pipe_layout = layout; + is_graphics_job = false; + } + }; + + const vk::render_device* m_device = nullptr; + lf_queue m_work_queue; + + std::unique_ptr int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout); + std::unique_ptr int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout, + const std::vector& vs_inputs, const std::vector& fs_inputs); + std::unique_ptr int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout, + const std::vector& vs_inputs, const std::vector& fs_inputs); + }; + + void initialize_pipe_compiler(int num_worker_threads = -1); + void destroy_pipe_compiler(); + pipe_compiler* get_pipe_compiler(); +} + +namespace rpcs3 +{ + template <> + size_t hash_struct(const vk::pipeline_props &pipelineProperties) + { + size_t seed = hash_base(pipelineProperties.renderpass_key); + seed ^= hash_struct(pipelineProperties.state.ia); + seed ^= hash_struct(pipelineProperties.state.ds); + seed ^= hash_struct(pipelineProperties.state.rs); + seed ^= hash_struct(pipelineProperties.state.ms); + seed ^= hash_base(pipelineProperties.state.temp_storage.msaa_sample_mask); + + // Do not compare pointers to memory! + VkPipelineColorBlendStateCreateInfo tmp; + memcpy(&tmp, &pipelineProperties.state.cs, sizeof(VkPipelineColorBlendStateCreateInfo)); + tmp.pAttachments = nullptr; + + seed ^= hash_struct(pipelineProperties.state.att_state[0]); + return hash_base(seed); + } +} diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 5bea8e9ceb..464df3ae8c 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -5,224 +5,106 @@ #include "Utilities/hash.h" #include "VKHelpers.h" #include "VKRenderPass.h" +#include "VKPipelineCompiler.h" namespace vk { - struct pipeline_props + struct VKTraits { - graphics_pipeline_state state; - u64 renderpass_key; + using vertex_program_type = VKVertexProgram; + using fragment_program_type = VKFragmentProgram; + using pipeline_type = vk::glsl::program; + using pipeline_storage_type = std::unique_ptr; + using pipeline_properties = vk::pipeline_props; - bool operator==(const pipeline_props& other) const + static + void recompile_fragment_program(const RSXFragmentProgram& RSXFP, fragment_program_type& fragmentProgramData, size_t ID) { - if (renderpass_key != other.renderpass_key) - return false; + fragmentProgramData.Decompile(RSXFP); + fragmentProgramData.id = static_cast(ID); + fragmentProgramData.Compile(); + } - if (memcmp(&state.ia, &other.state.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo))) - return false; + static + void recompile_vertex_program(const RSXVertexProgram& RSXVP, vertex_program_type& vertexProgramData, size_t ID) + { + vertexProgramData.Decompile(RSXVP); + vertexProgramData.id = static_cast(ID); + vertexProgramData.Compile(); + } - if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState))) - return false; + static + void validate_pipeline_properties(const VKVertexProgram&, const VKFragmentProgram& fp, vk::pipeline_props& properties) + { + //Explicitly disable writing to undefined registers + properties.state.att_state[0].colorWriteMask &= fp.output_color_masks[0]; + properties.state.att_state[1].colorWriteMask &= fp.output_color_masks[1]; + properties.state.att_state[2].colorWriteMask &= fp.output_color_masks[2]; + properties.state.att_state[3].colorWriteMask &= fp.output_color_masks[3]; + } - if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) - return false; + static + pipeline_type* build_pipeline( + const vertex_program_type& vertexProgramData, + const fragment_program_type& fragmentProgramData, + const vk::pipeline_props& pipelineProperties, + bool compile_async, + std::function callback, + VkDevice dev, VkPipelineLayout common_pipeline_layout) + { + const auto compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE; + VkShaderModule modules[2] = { vertexProgramData.handle, fragmentProgramData.handle }; - // Cannot memcmp cs due to pAttachments being a pointer to memory - if (state.cs.logicOp != other.state.cs.logicOp || - state.cs.logicOpEnable != other.state.cs.logicOpEnable || - memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32))) - return false; + auto compiler = vk::get_pipe_compiler(); + auto result = compiler->compile( + pipelineProperties, modules, common_pipeline_layout, + compiler_flags, callback, + vertexProgramData.uniforms, + fragmentProgramData.uniforms); - if (memcmp(&state.ds, &other.state.ds, sizeof(VkPipelineDepthStencilStateCreateInfo))) - return false; + return callback(result); + } + }; - if (state.ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT) - { - if (memcmp(&state.ms, &other.state.ms, sizeof(VkPipelineMultisampleStateCreateInfo))) - return false; + struct program_cache : public program_state_cache + { + program_cache(decompiler_callback_t callback) + { + notify_pipeline_compiled = callback; + } - if (state.temp_storage.msaa_sample_mask != other.state.temp_storage.msaa_sample_mask) - return false; - } + u64 get_hash(const vk::pipeline_props& props) + { + return rpcs3::hash_struct(props); + } - return true; + u64 get_hash(const RSXVertexProgram& prog) + { + return program_hash_util::vertex_program_utils::get_vertex_program_ucode_hash(prog); + } + + u64 get_hash(const RSXFragmentProgram& prog) + { + return program_hash_util::fragment_program_utils::get_fragment_program_ucode_hash(prog); + } + + template + void add_pipeline_entry(RSXVertexProgram& vp, RSXFragmentProgram& fp, vk::pipeline_props& props, Args&& ...args) + { + vp.skip_vertex_input_check = true; + get_graphics_pipeline(vp, fp, props, false, false, std::forward(args)...); + } + + void preload_programs(RSXVertexProgram& vp, RSXFragmentProgram& fp) + { + vp.skip_vertex_input_check = true; + search_vertex_program(vp); + search_fragment_program(fp); + } + + bool check_cache_missed() const + { + return m_cache_miss_flag; } }; } - -namespace rpcs3 -{ - template <> - size_t hash_struct(const vk::pipeline_props &pipelineProperties) - { - size_t seed = hash_base(pipelineProperties.renderpass_key); - seed ^= hash_struct(pipelineProperties.state.ia); - seed ^= hash_struct(pipelineProperties.state.ds); - seed ^= hash_struct(pipelineProperties.state.rs); - seed ^= hash_struct(pipelineProperties.state.ms); - seed ^= hash_base(pipelineProperties.state.temp_storage.msaa_sample_mask); - - // Do not compare pointers to memory! - VkPipelineColorBlendStateCreateInfo tmp; - memcpy(&tmp, &pipelineProperties.state.cs, sizeof(VkPipelineColorBlendStateCreateInfo)); - tmp.pAttachments = nullptr; - - seed ^= hash_struct(pipelineProperties.state.att_state[0]); - return hash_base(seed); - } -} - -struct VKTraits -{ - using vertex_program_type = VKVertexProgram; - using fragment_program_type = VKFragmentProgram; - using pipeline_storage_type = std::unique_ptr; - using pipeline_properties = vk::pipeline_props; - - static - void recompile_fragment_program(const RSXFragmentProgram &RSXFP, fragment_program_type& fragmentProgramData, size_t ID) - { - fragmentProgramData.Decompile(RSXFP); - fragmentProgramData.id = static_cast(ID); - fragmentProgramData.Compile(); - } - - static - void recompile_vertex_program(const RSXVertexProgram &RSXVP, vertex_program_type& vertexProgramData, size_t ID) - { - vertexProgramData.Decompile(RSXVP); - vertexProgramData.id = static_cast(ID); - vertexProgramData.Compile(); - } - - static - void validate_pipeline_properties(const VKVertexProgram&, const VKFragmentProgram &fp, vk::pipeline_props& properties) - { - //Explicitly disable writing to undefined registers - properties.state.att_state[0].colorWriteMask &= fp.output_color_masks[0]; - properties.state.att_state[1].colorWriteMask &= fp.output_color_masks[1]; - properties.state.att_state[2].colorWriteMask &= fp.output_color_masks[2]; - properties.state.att_state[3].colorWriteMask &= fp.output_color_masks[3]; - } - - static - pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, - const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout) - { - VkPipelineShaderStageCreateInfo shader_stages[2] = {}; - shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - shader_stages[0].module = vertexProgramData.handle; - shader_stages[0].pName = "main"; - - shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - shader_stages[1].module = fragmentProgramData.handle; - shader_stages[1].pName = "main"; - - std::vector dynamic_state_descriptors; - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_VIEWPORT); - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_SCISSOR); - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_LINE_WIDTH); - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS); - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK); - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE); - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS); - - if (vk::get_current_renderer()->get_depth_bounds_support()) - { - dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); - } - - VkPipelineDynamicStateCreateInfo dynamic_state_info = {}; - dynamic_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_info.pDynamicStates = dynamic_state_descriptors.data(); - dynamic_state_info.dynamicStateCount = ::size32(dynamic_state_descriptors); - - VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO }; - - VkPipelineViewportStateCreateInfo vp = {}; - vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - vp.viewportCount = 1; - vp.scissorCount = 1; - - VkPipelineMultisampleStateCreateInfo ms = pipelineProperties.state.ms; - verify("Multisample state mismatch!" HERE), ms.rasterizationSamples == VkSampleCountFlagBits((pipelineProperties.renderpass_key >> 16) & 0xF); - if (ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT) - { - // Update the sample mask pointer - ms.pSampleMask = &pipelineProperties.state.temp_storage.msaa_sample_mask; - } - - // Rebase pointers from pipeline structure in case it is moved/copied - VkPipelineColorBlendStateCreateInfo cs = pipelineProperties.state.cs; - cs.pAttachments = pipelineProperties.state.att_state; - - VkPipeline pipeline; - VkGraphicsPipelineCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - info.pVertexInputState = &vi; - info.pInputAssemblyState = &pipelineProperties.state.ia; - info.pRasterizationState = &pipelineProperties.state.rs; - info.pColorBlendState = &cs; - info.pMultisampleState = &ms; - info.pViewportState = &vp; - info.pDepthStencilState = &pipelineProperties.state.ds; - info.stageCount = 2; - info.pStages = shader_stages; - info.pDynamicState = &dynamic_state_info; - info.layout = common_pipeline_layout; - info.basePipelineIndex = -1; - info.basePipelineHandle = VK_NULL_HANDLE; - info.renderPass = vk::get_renderpass(dev, pipelineProperties.renderpass_key); - - CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline)); - - pipeline_storage_type result = std::make_unique(dev, pipeline, common_pipeline_layout, vertexProgramData.uniforms, fragmentProgramData.uniforms); - result->link(); - return result; - } -}; - -struct VKProgramBuffer : public program_state_cache -{ - VKProgramBuffer(decompiler_callback_t callback) - { - notify_pipeline_compiled = callback; - } - - u64 get_hash(const vk::pipeline_props &props) - { - return rpcs3::hash_struct(props); - } - - u64 get_hash(const RSXVertexProgram &prog) - { - return program_hash_util::vertex_program_utils::get_vertex_program_ucode_hash(prog); - } - - u64 get_hash(const RSXFragmentProgram &prog) - { - return program_hash_util::fragment_program_utils::get_fragment_program_ucode_hash(prog); - } - - template - void add_pipeline_entry(RSXVertexProgram &vp, RSXFragmentProgram &fp, vk::pipeline_props &props, Args&& ...args) - { - vp.skip_vertex_input_check = true; - get_graphics_pipeline(vp, fp, props, false, false, std::forward(args)...); - } - - void preload_programs(RSXVertexProgram &vp, RSXFragmentProgram &fp) - { - vp.skip_vertex_input_check = true; - search_vertex_program(vp); - search_fragment_program(fp); - } - - bool check_cache_missed() const - { - return m_cache_miss_flag; - } -}; diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index d824572371..5094734f4e 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -484,7 +484,6 @@ namespace vk VkPipelineColorBlendStateCreateInfo cs = properties.state.cs; cs.pAttachments = properties.state.att_state; - VkPipeline pipeline; VkGraphicsPipelineCreateInfo info = {}; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.pVertexInputState = &vi; @@ -502,8 +501,9 @@ namespace vk info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = vk::get_renderpass(m_device, properties.renderpass_key); - CHECK_RESULT(vkCreateGraphicsPipelines(m_device, nullptr, 1, &info, NULL, &pipeline)); - return new vk::glsl::program(m_device, pipeline, m_shared_pipeline_layout, m_vs_inputs, m_fs_inputs); + auto compiler = vk::get_pipe_compiler(); + auto program = compiler->compile(info, m_shared_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, m_vs_inputs, m_fs_inputs); + return program.release(); } void shader_interpreter::update_fragment_textures(const std::array& sampled_images, VkDescriptorSet descriptor_set) diff --git a/rpcs3/Emu/RSX/VK/VKTextOut.h b/rpcs3/Emu/RSX/VK/VKTextOut.h index aa44c5d823..00177bcfe7 100644 --- a/rpcs3/Emu/RSX/VK/VKTextOut.h +++ b/rpcs3/Emu/RSX/VK/VKTextOut.h @@ -3,6 +3,7 @@ #include "VKVertexProgram.h" #include "VKFragmentProgram.h" #include "VKRenderPass.h" +#include "VKPipelineCompiler.h" #include "../Common/TextGlyphs.h" namespace vk @@ -176,7 +177,6 @@ namespace vk VkPipelineDepthStencilStateCreateInfo ds = {}; ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - VkPipeline pipeline; VkGraphicsPipelineCreateInfo info = {}; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.pVertexInputState = &vi; @@ -194,10 +194,8 @@ namespace vk info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = m_render_pass; - CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline)); - - const std::vector unused; - m_program = std::make_unique(static_cast(dev), pipeline, m_pipeline_layout, unused, unused); + auto compiler = vk::get_pipe_compiler(); + m_program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE); } void load_program(vk::command_buffer &cmd, float scale_x, float scale_y, const float *offsets, size_t nb_offsets, std::array color) diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 2ee7bf6ae3..85b201cd1f 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -645,7 +645,7 @@ namespace rsx if (!fs::is_file(fp_name)) { - fs::file(fp_name, fs::rewrite).write(fp.addr, fp.ucode_length); + fs::file(fp_name, fs::rewrite).write(fp.get_data(), fp.ucode_length); } if (!fs::is_file(vp_name)) @@ -700,7 +700,7 @@ namespace rsx { std::lock_guard lock(fpd_mutex); fragment_program_data[program_hash] = data; - fp.addr = fragment_program_data[program_hash].data(); + fp.data = fragment_program_data[program_hash].data(); } fp.ucode_length = ::size32(data); diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 7be132b865..9574b6c027 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -152,6 +152,7 @@ struct cfg_root : cfg::node cfg::_int<50, 800> resolution_scale_percent{ this, "Resolution Scale", 100 }; cfg::_int<0, 16> anisotropic_level_override{ this, "Anisotropic Filter Override", 0, true }; cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 }; + cfg::_int<0, 16> shader_compiler_threads_count{ this, "Shader Compiler Threads", 0 }; cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true }; cfg::_int<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true }; cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 2596ac630f..f49cbef01e 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -65,8 +65,6 @@ - - MaxSpeed @@ -80,6 +78,7 @@ + @@ -99,6 +98,7 @@ + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index aacfb71395..8d24eb74ba 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -13,6 +13,7 @@ + @@ -31,5 +32,6 @@ + - + \ No newline at end of file diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index 4fda1c4b34..3f3eabafce 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -33,6 +33,7 @@ + @@ -55,6 +56,7 @@ + diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index 1e758287bd..d9ff7e0d59 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -21,6 +21,7 @@ + @@ -44,5 +45,6 @@ + \ No newline at end of file