rsx: Use multithreaded shader compiler backend

This commit is contained in:
kd-11 2020-10-27 23:41:20 +03:00 committed by kd-11
parent e89a568765
commit 3ddfa288cf
30 changed files with 1065 additions and 580 deletions

View File

@ -422,6 +422,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/GL/GLFragmentProgram.cpp RSX/GL/GLFragmentProgram.cpp
RSX/GL/GLGSRender.cpp RSX/GL/GLGSRender.cpp
RSX/GL/GLHelpers.cpp RSX/GL/GLHelpers.cpp
RSX/GL/GLPipelineCompiler.cpp
RSX/GL/GLPresent.cpp RSX/GL/GLPresent.cpp
RSX/GL/GLRenderTargets.cpp RSX/GL/GLRenderTargets.cpp
RSX/GL/GLShaderInterpreter.cpp RSX/GL/GLShaderInterpreter.cpp
@ -443,6 +444,7 @@ if(TARGET 3rdparty_vulkan)
RSX/VK/VKGSRender.cpp RSX/VK/VKGSRender.cpp
RSX/VK/VKHelpers.cpp RSX/VK/VKHelpers.cpp
RSX/VK/VKMemAlloc.cpp RSX/VK/VKMemAlloc.cpp
RSX/VK/VKPipelineCompiler.cpp
RSX/VK/VKPresent.cpp RSX/VK/VKPresent.cpp
RSX/VK/VKProgramPipeline.cpp RSX/VK/VKProgramPipeline.cpp
RSX/VK/VKQueryPool.cpp RSX/VK/VKQueryPool.cpp

View File

@ -310,7 +310,7 @@ public:
u32 ctrl = (vmfprog.outputFromH0 ? 0 : 0x40) | (vmfprog.depthReplace ? 0xe : 0); u32 ctrl = (vmfprog.outputFromH0 ? 0 : 0x40) | (vmfprog.depthReplace ? 0xe : 0);
std::vector<rsx::texture_dimension_extended> td; std::vector<rsx::texture_dimension_extended> td;
RSXFragmentProgram prog; RSXFragmentProgram prog;
prog.ucode_length = 0, prog.addr = vm::base(ptr + vmprog.ucode), prog.offset = 0, prog.ctrl = ctrl; prog.ucode_length = 0, prog.data = vm::base(ptr + vmprog.ucode), prog.offset = 0, prog.ctrl = ctrl;
GLFragmentDecompilerThread(m_glsl_shader, param_array, prog, size).Task(); GLFragmentDecompilerThread(m_glsl_shader, param_array, prog, size).Task();
vm::close(); vm::close();
} }

View File

@ -217,7 +217,7 @@ std::string FragmentProgramDecompiler::AddConst()
return name; return name;
} }
auto data = reinterpret_cast<be_t<u32>*>(static_cast<char*>(m_prog.addr) + m_size + 4 * sizeof(u32)); auto data = reinterpret_cast<be_t<u32>*>(static_cast<char*>(m_prog.get_data()) + m_size + 4 * sizeof(u32));
m_offset = 2 * 4 * sizeof(u32); m_offset = 2 * 4 * sizeof(u32);
u32 x = GetData(data[0]); u32 x = GetData(data[0]);
u32 y = GetData(data[1]); u32 y = GetData(data[1]);
@ -1118,7 +1118,7 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode)
std::string FragmentProgramDecompiler::Decompile() std::string FragmentProgramDecompiler::Decompile()
{ {
auto data = static_cast<be_t<u32>*>(m_prog.addr); auto data = static_cast<be_t<u32>*>(m_prog.get_data());
m_size = 0; m_size = 0;
m_location = 0; m_location = 0;
m_loop_count = 0; m_loop_count = 0;

View File

@ -423,7 +423,7 @@ size_t fragment_program_utils::get_fragment_program_ucode_hash(const RSXFragment
{ {
// 64-bit Fowler/Noll/Vo FNV-1a hash code // 64-bit Fowler/Noll/Vo FNV-1a hash code
size_t hash = 0xCBF29CE484222325ULL; size_t hash = 0xCBF29CE484222325ULL;
const void* instbuffer = program.addr; const void* instbuffer = program.get_data();
size_t instIndex = 0; size_t instIndex = 0;
while (true) while (true)
{ {
@ -475,8 +475,8 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con
return false; return false;
} }
const void* instBuffer1 = binary1.addr; const void* instBuffer1 = binary1.get_data();
const void* instBuffer2 = binary2.addr; const void* instBuffer2 = binary2.get_data();
size_t instIndex = 0; size_t instIndex = 0;
while (true) while (true)
{ {

View File

@ -99,6 +99,7 @@ template<typename backend_traits>
class program_state_cache class program_state_cache
{ {
using pipeline_storage_type = typename backend_traits::pipeline_storage_type; using pipeline_storage_type = typename backend_traits::pipeline_storage_type;
using pipeline_type = typename backend_traits::pipeline_type;
using pipeline_properties = typename backend_traits::pipeline_properties; using pipeline_properties = typename backend_traits::pipeline_properties;
using vertex_program_type = typename backend_traits::vertex_program_type; using vertex_program_type = typename backend_traits::vertex_program_type;
using fragment_program_type = typename backend_traits::fragment_program_type; using fragment_program_type = typename backend_traits::fragment_program_type;
@ -133,23 +134,6 @@ class program_state_cache
} }
}; };
struct async_decompiler_job
{
RSXVertexProgram vertex_program;
RSXFragmentProgram fragment_program;
pipeline_properties properties;
std::vector<u8> local_storage;
async_decompiler_job(RSXVertexProgram v, const RSXFragmentProgram f, pipeline_properties p) :
vertex_program(std::move(v)), fragment_program(f), properties(std::move(p))
{
local_storage.resize(fragment_program.ucode_length);
std::memcpy(local_storage.data(), fragment_program.addr, fragment_program.ucode_length);
fragment_program.addr = local_storage.data();
}
};
protected: protected:
using decompiler_callback_t = std::function<void(const pipeline_properties&, const RSXVertexProgram&, const RSXFragmentProgram&)>; using decompiler_callback_t = std::function<void(const pipeline_properties&, const RSXVertexProgram&, const RSXFragmentProgram&)>;
@ -165,8 +149,6 @@ protected:
binary_to_fragment_program m_fragment_shader_cache; binary_to_fragment_program m_fragment_shader_cache;
std::unordered_map<pipeline_key, pipeline_storage_type, pipeline_key_hash, pipeline_key_compare> m_storage; std::unordered_map<pipeline_key, pipeline_storage_type, pipeline_key_hash, pipeline_key_compare> m_storage;
std::deque<async_decompiler_job> m_decompile_queue;
std::unordered_map<pipeline_key, bool, pipeline_key_hash, pipeline_key_compare> m_decompiler_map;
decompiler_callback_t notify_pipeline_compiled; decompiler_callback_t notify_pipeline_compiled;
vertex_program_type __null_vertex_program; vertex_program_type __null_vertex_program;
@ -213,7 +195,6 @@ protected:
{ {
bool recompile = false; bool recompile = false;
fragment_program_type* new_shader; fragment_program_type* new_shader;
void* fragment_program_ucode_copy;
{ {
reader_lock lock(m_fragment_mutex); reader_lock lock(m_fragment_mutex);
@ -229,27 +210,17 @@ protected:
} }
rsx_log.notice("FP not found in buffer!"); rsx_log.notice("FP not found in buffer!");
fragment_program_ucode_copy = malloc(rsx_fp.ucode_length);
verify("malloc() failed!" HERE), fragment_program_ucode_copy;
std::memcpy(fragment_program_ucode_copy, rsx_fp.addr, rsx_fp.ucode_length);
RSXFragmentProgram new_fp_key = rsx_fp;
new_fp_key.addr = fragment_program_ucode_copy;
lock.upgrade(); lock.upgrade();
auto [it, inserted] = m_fragment_shader_cache.try_emplace(new_fp_key); auto [it, inserted] = m_fragment_shader_cache.try_emplace(rsx_fp);
new_shader = &(it->second); new_shader = &(it->second);
recompile = inserted; recompile = inserted;
}
if (recompile) if (inserted)
{ {
it->first.clone_data();
backend_traits::recompile_fragment_program(rsx_fp, *new_shader, m_next_id++); backend_traits::recompile_fragment_program(rsx_fp, *new_shader, m_next_id++);
} }
else
{
free(fragment_program_ucode_copy);
} }
return std::forward_as_tuple(*new_shader, false); return std::forward_as_tuple(*new_shader, false);
@ -330,94 +301,18 @@ public:
~program_state_cache() ~program_state_cache()
{} {}
// Returns 2 booleans.
// First flag hints that there is more work to do (busy hint)
// Second flag is true if at least one program has been linked successfully (sync hint)
template<typename... Args> template<typename... Args>
std::pair<bool, bool> async_update(u32 max_decompile_count, Args&& ...args) pipeline_type* get_graphics_pipeline(
{
// Decompile shaders and link one pipeline object per 'run'
// NOTE: Linking is much slower than decompilation step, so always decompile at least 1 unit
// TODO: Use try_lock instead
bool busy = false;
bool sync = false;
u32 count = 0;
while (true)
{
{
reader_lock lock(m_decompiler_mutex);
if (m_decompile_queue.empty())
{
break;
}
}
// Decompile
const auto& vp_search = search_vertex_program(m_decompile_queue.front().vertex_program, true);
const auto& fp_search = search_fragment_program(m_decompile_queue.front().fragment_program, true);
const bool already_existing_fragment_program = std::get<1>(fp_search);
const bool already_existing_vertex_program = std::get<1>(vp_search);
const vertex_program_type& vertex_program = std::get<0>(vp_search);
const fragment_program_type& fragment_program = std::get<0>(fp_search);
const pipeline_key key = { vertex_program.id, fragment_program.id, m_decompile_queue.front().properties };
// Retest
bool found = false;
if (already_existing_vertex_program && already_existing_fragment_program)
{
if (auto I = m_storage.find(key); I != m_storage.end())
{
found = true;
}
}
if (!found)
{
pipeline_storage_type pipeline = backend_traits::build_pipeline(vertex_program, fragment_program, m_decompile_queue.front().properties, std::forward<Args>(args)...);
rsx_log.success("New program compiled successfully");
sync = true;
if (notify_pipeline_compiled)
{
notify_pipeline_compiled(m_decompile_queue.front().properties, m_decompile_queue.front().vertex_program, m_decompile_queue.front().fragment_program);
}
std::scoped_lock lock(m_pipeline_mutex);
m_storage[key] = std::move(pipeline);
}
{
std::scoped_lock lock(m_decompiler_mutex);
m_decompile_queue.pop_front();
m_decompiler_map.erase(key);
}
if (++count >= max_decompile_count)
{
// Allows configurable decompiler 'load'
// Smaller unit count will release locks faster
busy = true;
break;
}
}
return { busy, sync };
}
template<typename... Args>
pipeline_storage_type& get_graphics_pipeline(
const RSXVertexProgram& vertexShader, const RSXVertexProgram& vertexShader,
const RSXFragmentProgram& fragmentShader, const RSXFragmentProgram& fragmentShader,
pipeline_properties& pipelineProperties, pipeline_properties& pipelineProperties,
bool allow_async, bool compile_async,
bool allow_notification, bool allow_notification,
Args&& ...args Args&& ...args
) )
{ {
const auto &vp_search = search_vertex_program(vertexShader, !allow_async); const auto &vp_search = search_vertex_program(vertexShader);
const auto &fp_search = search_fragment_program(fragmentShader, !allow_async); const auto &fp_search = search_fragment_program(fragmentShader);
const bool already_existing_fragment_program = std::get<1>(fp_search); const bool already_existing_fragment_program = std::get<1>(fp_search);
const bool already_existing_vertex_program = std::get<1>(vp_search); const bool already_existing_vertex_program = std::get<1>(vp_search);
@ -427,62 +322,79 @@ public:
m_cache_miss_flag = true; m_cache_miss_flag = true;
if (!allow_async || (already_existing_vertex_program && already_existing_fragment_program)) if (already_existing_vertex_program && already_existing_fragment_program)
{ {
// There is a high chance the pipeline object was compiled if the two shaders already existed before
backend_traits::validate_pipeline_properties(vertex_program, fragment_program, pipelineProperties); backend_traits::validate_pipeline_properties(vertex_program, fragment_program, pipelineProperties);
{
reader_lock lock(m_pipeline_mutex); reader_lock lock(m_pipeline_mutex);
if (const auto I = m_storage.find(key); I != m_storage.end()) if (const auto I = m_storage.find(key); I != m_storage.end())
{ {
m_cache_miss_flag = false; m_cache_miss_flag = (I->second == __null_pipeline_handle);
return I->second; return I->second.get();
} }
} }
if (!allow_async)
{ {
std::lock_guard lock(m_pipeline_mutex);
// Check if another submission completed in the mean time
if (const auto I = m_storage.find(key); I != m_storage.end())
{
m_cache_miss_flag = (I->second == __null_pipeline_handle);
return I->second.get();
}
// Insert a placeholder if the key still doesn't exist to avoid re-linking of the same pipeline
m_storage[key] = std::move(__null_pipeline_handle);
}
rsx_log.notice("Add program (vp id = %d, fp id = %d)", vertex_program.id, fragment_program.id); rsx_log.notice("Add program (vp id = %d, fp id = %d)", vertex_program.id, fragment_program.id);
pipeline_storage_type pipeline = backend_traits::build_pipeline(vertex_program, fragment_program, pipelineProperties, std::forward<Args>(args)...);
if (allow_notification && notify_pipeline_compiled) std::function<pipeline_type* (pipeline_storage_type&)> callback;
if (allow_notification)
{ {
notify_pipeline_compiled(pipelineProperties, vertexShader, fragmentShader); callback = [this, vertexShader, fragmentShader_ = RSXFragmentProgram::clone(fragmentShader), key]
rsx_log.success("New program compiled successfully"); (pipeline_storage_type& pipeline) -> pipeline_type*
{
if (!pipeline)
{
return nullptr;
}
rsx_log.success("Program compiled successfully");
notify_pipeline_compiled(key.properties, vertexShader, fragmentShader_);
std::lock_guard lock(m_pipeline_mutex);
auto& pipe_result = m_storage[key];
pipe_result = std::move(pipeline);
return pipe_result.get();
};
}
else
{
callback = [this, key](pipeline_storage_type& pipeline) -> pipeline_type*
{
if (!pipeline)
{
return nullptr;
} }
std::lock_guard lock(m_pipeline_mutex); std::lock_guard lock(m_pipeline_mutex);
auto &rtn = m_storage[key] = std::move(pipeline); auto& pipe_result = m_storage[key];
return rtn; pipe_result = std::move(pipeline);
} return pipe_result.get();
};
} }
verify(HERE), allow_async; return backend_traits::build_pipeline(
vertex_program, // VS, must already be decompiled and recompiled above
std::scoped_lock lock(m_decompiler_mutex, m_pipeline_mutex); fragment_program, // FS, must already be decompiled and recompiled above
pipelineProperties, // Pipeline state
// Rechecks compile_async, // Allow asynchronous compilation
if (already_existing_vertex_program && already_existing_fragment_program) callback, // Insertion and notification callback
{ std::forward<Args>(args)...); // Other arguments
if (const auto I = m_storage.find(key); I != m_storage.end())
{
m_cache_miss_flag = false;
return I->second;
}
if (const auto I = m_decompiler_map.find(key); I != m_decompiler_map.end())
{
// Already in queue
return __null_pipeline_handle;
}
m_decompiler_map[key] = true;
}
// Enqueue if not already queued
m_decompile_queue.emplace_back(vertexShader, fragmentShader, pipelineProperties);
return __null_pipeline_handle;
} }
void fill_fragment_constants_buffer(gsl::span<f32> dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize = false) const void fill_fragment_constants_buffer(gsl::span<f32> dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize = false) const
@ -497,7 +409,7 @@ public:
alignas(16) f32 tmp[4]; alignas(16) f32 tmp[4];
for (size_t offset_in_fragment_program : I->second.FragmentConstantOffsetCache) for (size_t offset_in_fragment_program : I->second.FragmentConstantOffsetCache)
{ {
char* data = static_cast<char*>(fragment_program.addr) + offset_in_fragment_program; char* data = static_cast<char*>(fragment_program.get_data()) + offset_in_fragment_program;
const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data)); const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data));
const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8)); const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8));
@ -546,11 +458,6 @@ public:
{ {
std::scoped_lock lock(m_vertex_mutex, m_fragment_mutex, m_decompiler_mutex, m_pipeline_mutex); std::scoped_lock lock(m_vertex_mutex, m_fragment_mutex, m_decompiler_mutex, m_pipeline_mutex);
for (auto& pair : m_fragment_shader_cache)
{
free(pair.first.addr);
}
notify_pipeline_compiled = {}; notify_pipeline_compiled = {};
m_fragment_shader_cache.clear(); m_fragment_shader_cache.clear();
m_vertex_shader_cache.clear(); m_vertex_shader_cache.clear();

View File

@ -59,10 +59,35 @@ void GLGSRender::on_init_thread()
m_context = m_frame->make_context(); m_context = m_frame->make_context();
const auto shadermode = g_cfg.video.shadermode.get(); const auto shadermode = g_cfg.video.shadermode.get();
if (shadermode != shader_mode::recompiler)
if (shadermode == shader_mode::async_recompiler || shadermode == shader_mode::async_with_interpreter)
{ {
m_decompiler_context = m_frame->make_context(); auto context_create_func = [m_frame = m_frame]()
{
return m_frame->make_context();
};
auto context_bind_func = [m_frame = m_frame](draw_context_t ctx)
{
m_frame->set_current(ctx);
};
auto context_destroy_func = [m_frame = m_frame](draw_context_t ctx)
{
m_frame->delete_context(ctx);
};
int thread_count = g_cfg.video.shader_compiler_threads_count;
if (!thread_count) thread_count = -1;
gl::initialize_pipe_compiler(context_create_func, context_bind_func, context_destroy_func, thread_count);
}
else
{
auto null_context_create_func = []() -> draw_context_t
{
return nullptr;
};
gl::initialize_pipe_compiler(null_context_create_func, {}, {}, 1);
} }
// Bind primary context to main RSX thread // Bind primary context to main RSX thread
@ -342,6 +367,8 @@ void GLGSRender::on_exit()
gl::g_typeless_transfer_buffer.remove(); gl::g_typeless_transfer_buffer.remove();
} }
gl::destroy_pipe_compiler();
m_prog_buffer.clear(); m_prog_buffer.clear();
m_rtts.destroy(); m_rtts.destroy();
@ -653,7 +680,7 @@ bool GLGSRender::load_program()
{ {
void* pipeline_properties = nullptr; void* pipeline_properties = nullptr;
m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties, m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties,
shadermode != shader_mode::recompiler, true).get(); shadermode != shader_mode::recompiler, true);
if (m_prog_buffer.check_cache_missed()) if (m_prog_buffer.check_cache_missed())
{ {
@ -838,8 +865,7 @@ void GLGSRender::load_program_env()
// Bind textures // Bind textures
m_shader_interpreter.update_fragment_textures(fs_sampler_state, current_fp_metadata.referenced_textures_mask, reinterpret_cast<u32*>(fp_buf + 16)); m_shader_interpreter.update_fragment_textures(fs_sampler_state, current_fp_metadata.referenced_textures_mask, reinterpret_cast<u32*>(fp_buf + 16));
const auto fp_data = static_cast<u8*>(current_fragment_program.addr) + current_fp_metadata.program_start_offset; std::memcpy(fp_buf + 80, current_fragment_program.get_data(), current_fragment_program.ucode_length);
std::memcpy(fp_buf + 80, fp_data, current_fp_metadata.program_ucode_length);
m_fragment_instructions_buffer->bind_range(GL_INTERPRETER_FRAGMENT_BLOCK, fp_mapping.second, fp_block_length); m_fragment_instructions_buffer->bind_range(GL_INTERPRETER_FRAGMENT_BLOCK, fp_mapping.second, fp_block_length);
m_fragment_instructions_buffer->notify(); m_fragment_instructions_buffer->notify();
@ -1072,20 +1098,3 @@ void GLGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* que
glEndQuery(GL_ANY_SAMPLES_PASSED); glEndQuery(GL_ANY_SAMPLES_PASSED);
} }
} }
void GLGSRender::on_decompiler_init()
{
// Bind decompiler context to this thread
m_frame->set_current(m_decompiler_context);
}
void GLGSRender::on_decompiler_exit()
{
// Cleanup
m_frame->delete_context(m_decompiler_context);
}
bool GLGSRender::on_decompiler_task()
{
return m_prog_buffer.async_update(8).first;
}

View File

@ -121,7 +121,6 @@ private:
std::list<gl::work_item> work_queue; std::list<gl::work_item> work_queue;
GLProgramBuffer m_prog_buffer; GLProgramBuffer m_prog_buffer;
draw_context_t m_decompiler_context;
//buffer //buffer
gl::fbo* m_draw_fbo = nullptr; gl::fbo* m_draw_fbo = nullptr;
@ -198,8 +197,4 @@ protected:
std::array<std::vector<std::byte>, 4> copy_render_targets_to_memory() override; std::array<std::vector<std::byte>, 4> copy_render_targets_to_memory() override;
std::array<std::vector<std::byte>, 2> copy_depth_stencil_buffer_to_memory() override; std::array<std::vector<std::byte>, 2> copy_depth_stencil_buffer_to_memory() override;
void on_decompiler_init() override;
void on_decompiler_exit() override;
bool on_decompiler_task() override;
}; };

View File

@ -23,6 +23,18 @@ namespace gl
return s_tls_primary_context_thread; return s_tls_primary_context_thread;
} }
void flush_command_queue(fence& fence_obj)
{
if (is_primary_context_thread())
{
fence_obj.check_signaled();
}
else
{
glFlush();
}
}
GLenum draw_mode(rsx::primitive_type in) GLenum draw_mode(rsx::primitive_type in)
{ {
switch (in) switch (in)

View File

@ -51,12 +51,15 @@ namespace gl
else\ else\
gl##func##EXT(texture_name, target, __VA_ARGS__); gl##func##EXT(texture_name, target, __VA_ARGS__);
class fence;
void enable_debugging(); void enable_debugging();
bool is_primitive_native(rsx::primitive_type in); bool is_primitive_native(rsx::primitive_type in);
GLenum draw_mode(rsx::primitive_type in); GLenum draw_mode(rsx::primitive_type in);
void set_primary_context_thread(bool = true); void set_primary_context_thread(bool = true);
bool is_primary_context_thread(); bool is_primary_context_thread();
void flush_command_queue(fence& fence_obj);
// Texture helpers // Texture helpers
std::array<GLenum, 4> apply_swizzle_remap(const std::array<GLenum, 4>& swizzle_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& decoded_remap); std::array<GLenum, 4> apply_swizzle_remap(const std::array<GLenum, 4>& swizzle_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& decoded_remap);
@ -76,8 +79,8 @@ namespace gl
class fence class fence
{ {
GLsync m_value = nullptr; GLsync m_value = nullptr;
GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; mutable GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
bool signaled = false; mutable bool signaled = false;
public: public:
@ -104,12 +107,12 @@ namespace gl
create(); create();
} }
bool is_empty() bool is_empty() const
{ {
return (m_value == nullptr); return (m_value == nullptr);
} }
bool check_signaled() bool check_signaled() const
{ {
verify(HERE), m_value != nullptr; verify(HERE), m_value != nullptr;
@ -2222,6 +2225,8 @@ public:
::glsl::program_domain type; ::glsl::program_domain type;
GLuint m_id = GL_NONE; GLuint m_id = GL_NONE;
fence m_compiled_fence;
public: public:
shader() = default; shader() = default;
@ -2245,10 +2250,7 @@ public:
{ {
type = type_; type = type_;
source = src; source = src;
}
shader& compile()
{
GLenum shader_type; GLenum shader_type;
switch (type) switch (type)
{ {
@ -2266,6 +2268,10 @@ public:
} }
m_id = glCreateShader(shader_type); m_id = glCreateShader(shader_type);
}
shader& compile()
{
const char* str = source.c_str(); const char* str = source.c_str();
const GLint length = ::narrow<GLint>(source.length()); const GLint length = ::narrow<GLint>(source.length());
@ -2310,6 +2316,8 @@ public:
rsx_log.fatal("Compilation failed: %s", error_msg); rsx_log.fatal("Compilation failed: %s", error_msg);
} }
m_compiled_fence.create();
flush_command_queue(m_compiled_fence);
return *this; return *this;
} }
@ -2332,6 +2340,11 @@ public:
return source; return source;
} }
fence get_compile_fence_sync() const
{
return m_compiled_fence;
}
void set_id(uint id) void set_id(uint id)
{ {
m_id = id; m_id = id;
@ -2348,6 +2361,19 @@ public:
} }
}; };
class shader_view : public shader
{
public:
shader_view(GLuint id) : shader(id)
{
}
~shader_view()
{
set_id(0);
}
};
class program class program
{ {
GLuint m_id = GL_NONE; GLuint m_id = GL_NONE;
@ -2548,11 +2574,7 @@ public:
} }
m_fence.create(); m_fence.create();
flush_command_queue(m_fence);
if (!is_primary_context_thread())
{
glFlush();
}
} }
} }
@ -2637,18 +2659,6 @@ public:
return glGetUniformLocation(m_id, name.c_str()); return glGetUniformLocation(m_id, name.c_str());
} }
program& operator += (const shader& rhs)
{
return attach(rhs);
}
program& operator += (std::initializer_list<shader> shaders)
{
for (auto &shader : shaders)
*this += shader;
return *this;
}
program() = default; program() = default;
program(const program&) = delete; program(const program&) = delete;
program(program&& program_) program(program&& program_)
@ -2683,19 +2693,6 @@ public:
} }
}; };
class shader_view : public shader
{
public:
shader_view(GLuint id) : shader(id)
{
}
~shader_view()
{
set_id(0);
}
};
class program_view : public program class program_view : public program
{ {
public: public:

View File

@ -0,0 +1,149 @@
#include "stdafx.h"
#include "GLPipelineCompiler.h"
#include "Utilities/Thread.h"
#include <thread>
namespace gl
{
// Global list of worker threads
std::unique_ptr<named_thread_group<pipe_compiler>> g_pipe_compilers;
int g_num_pipe_compilers = 0;
atomic_t<int> g_compiler_index{};
pipe_compiler::pipe_compiler()
{
}
pipe_compiler::~pipe_compiler()
{
if (m_context_destroy_func)
{
m_context_destroy_func(m_context);
}
}
void pipe_compiler::initialize(
std::function<draw_context_t()> context_create_func,
std::function<void(draw_context_t)> context_bind_func,
std::function<void(draw_context_t)> context_destroy_func)
{
m_context_bind_func = context_bind_func;
m_context_destroy_func = context_destroy_func;
m_context = context_create_func();
}
void pipe_compiler::operator()()
{
while (thread_ctrl::state() != thread_state::aborting)
{
for (auto&& job : m_work_queue.pop_all())
{
if (m_context_ready.compare_and_swap_test(false, true))
{
// Bind context on first use
m_context_bind_func(m_context);
}
auto result = int_compile_graphics_pipe(
job.vp_handle, job.fp_handle,
job.post_create_func,
job.post_link_func);
job.completion_callback(result);
}
m_work_queue.wait();
}
}
std::unique_ptr<glsl::program> pipe_compiler::compile(
GLuint vp_handle, GLuint fp_handle,
op_flags flags,
callback_t post_create_func,
callback_t post_link_func,
callback_t completion_callback_func)
{
if (flags == COMPILE_INLINE)
{
return int_compile_graphics_pipe(vp_handle, fp_handle, post_create_func, post_link_func);
}
m_work_queue.push(vp_handle, fp_handle, post_create_func, post_link_func, completion_callback_func);
return {};
}
std::unique_ptr<glsl::program> pipe_compiler::int_compile_graphics_pipe(
GLuint vp_handle, GLuint fp_handle,
callback_t post_create_func,
callback_t post_link_func)
{
auto result = std::make_unique<glsl::program>();
result->create();
if (post_create_func)
{
post_create_func(result);
}
result->link();
if (post_link_func)
{
post_link_func(result);
}
return result;
}
void initialize_pipe_compiler(
std::function<draw_context_t()> context_create_func,
std::function<void(draw_context_t)> context_bind_func,
std::function<void(draw_context_t)> context_destroy_func,
int num_worker_threads)
{
if (num_worker_threads == -1)
{
// Select optimal number of compiler threads
const auto hw_threads = std::thread::hardware_concurrency();
if (hw_threads >= 12)
{
num_worker_threads = 4;
}
else if (hw_threads >= 8)
{
num_worker_threads = 2;
}
else
{
num_worker_threads = 1;
}
}
verify(HERE), num_worker_threads >= 1;
// Create the thread pool
g_pipe_compilers = std::make_unique<named_thread_group<pipe_compiler>>("RSX.W", num_worker_threads);
g_num_pipe_compilers = num_worker_threads;
// Initialize the workers. At least one inline compiler shall exist (doesn't actually run)
for (pipe_compiler& compiler : *g_pipe_compilers.get())
{
compiler.initialize(context_create_func, context_bind_func, context_destroy_func);
}
}
void destroy_pipe_compiler()
{
g_pipe_compilers.reset();
}
pipe_compiler* get_pipe_compiler()
{
verify(HERE), g_pipe_compilers;
int thread_index = g_compiler_index++;
return g_pipe_compilers.get()->begin() + (thread_index % g_num_pipe_compilers);
}
}

View File

@ -0,0 +1,72 @@
#pragma once
#include "GLHelpers.h"
#include "Emu/RSX/display.h"
#include "Utilities/lockless.h"
namespace gl
{
class pipe_compiler
{
public:
enum op_flags
{
COMPILE_DEFAULT = 0,
COMPILE_INLINE = 1,
COMPILE_DEFERRED = 2
};
using callback_t = std::function<void(std::unique_ptr<glsl::program>&)>;
pipe_compiler();
~pipe_compiler();
void initialize(
std::function<draw_context_t()> context_create_func,
std::function<void(draw_context_t)> context_bind_func,
std::function<void(draw_context_t)> context_destroy_func);
std::unique_ptr<glsl::program> compile(
GLuint vp_handle, GLuint fp_handle,
op_flags flags,
callback_t post_create_func = {},
callback_t post_link_func = {},
callback_t completion_callback = {});
void operator()();
private:
struct pipe_compiler_job
{
GLuint vp_handle;
GLuint fp_handle;
callback_t post_create_func;
callback_t post_link_func;
callback_t completion_callback;
pipe_compiler_job(GLuint vp, GLuint fp, callback_t post_create, callback_t post_link, callback_t completion)
: vp_handle(vp), fp_handle(fp), post_create_func(post_create), post_link_func(post_link), completion_callback(completion)
{}
};
lf_queue<pipe_compiler_job> m_work_queue;
draw_context_t m_context = 0;
atomic_t<bool> m_context_ready = false;
std::function<void(draw_context_t context)> m_context_bind_func;
std::function<void(draw_context_t context)> m_context_destroy_func;
std::unique_ptr<glsl::program> int_compile_graphics_pipe(
GLuint vp_handle, GLuint fp_handle, callback_t post_create_func, callback_t post_link_func);
};
void initialize_pipe_compiler(
std::function<draw_context_t()> context_create_func,
std::function<void(draw_context_t)> context_bind_func,
std::function<void(draw_context_t)> contextdestroy_func,
int num_worker_threads = -1);
void destroy_pipe_compiler();
pipe_compiler* get_pipe_compiler();
}

View File

@ -2,6 +2,7 @@
#include "GLVertexProgram.h" #include "GLVertexProgram.h"
#include "GLFragmentProgram.h" #include "GLFragmentProgram.h"
#include "GLHelpers.h" #include "GLHelpers.h"
#include "GLPipelineCompiler.h"
#include "../Common/ProgramStateCache.h" #include "../Common/ProgramStateCache.h"
#include "../rsx_utils.h" #include "../rsx_utils.h"
@ -9,6 +10,7 @@ struct GLTraits
{ {
using vertex_program_type = GLVertexProgram; using vertex_program_type = GLVertexProgram;
using fragment_program_type = GLFragmentProgram; using fragment_program_type = GLFragmentProgram;
using pipeline_type = gl::glsl::program;
using pipeline_storage_type = std::unique_ptr<gl::glsl::program>; using pipeline_storage_type = std::unique_ptr<gl::glsl::program>;
using pipeline_properties = void*; using pipeline_properties = void*;
@ -32,17 +34,49 @@ struct GLTraits
} }
static static
pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties&) pipeline_type* build_pipeline(
const vertex_program_type &vertexProgramData,
const fragment_program_type &fragmentProgramData,
const pipeline_properties&,
bool compile_async,
std::function<pipeline_type*(pipeline_storage_type&)> callback)
{ {
pipeline_storage_type result = std::make_unique<gl::glsl::program>(); auto compiler = gl::get_pipe_compiler();
result->create() auto flags = (compile_async) ? gl::pipe_compiler::COMPILE_DEFERRED : gl::pipe_compiler::COMPILE_INLINE;
.attach(gl::glsl::shader_view(vertexProgramData.id))
.attach(gl::glsl::shader_view(fragmentProgramData.id)) gl::fence vp_fence, fp_fence;
if (compile_async)
{
vp_fence = vertexProgramData.shader.get_compile_fence_sync();
fp_fence = fragmentProgramData.shader.get_compile_fence_sync();
}
auto post_create_func = [vp_id = vertexProgramData.id, fp_id = fragmentProgramData.id, vp_fence, fp_fence]
(std::unique_ptr<gl::glsl::program>& program)
{
if (!vp_fence.is_empty())
{
// Force server threads to wait for the compilation to finish
vp_fence.server_wait_sync();
fp_fence.server_wait_sync();
}
program->attach(gl::glsl::shader_view(vp_id))
.attach(gl::glsl::shader_view(fp_id))
.bind_fragment_data_location("ocol0", 0) .bind_fragment_data_location("ocol0", 0)
.bind_fragment_data_location("ocol1", 1) .bind_fragment_data_location("ocol1", 1)
.bind_fragment_data_location("ocol2", 2) .bind_fragment_data_location("ocol2", 2)
.bind_fragment_data_location("ocol3", 3) .bind_fragment_data_location("ocol3", 3);
.link([](gl::glsl::program* program)
if (g_cfg.video.log_programs)
{
rsx_log.notice("*** prog id = %d", program->id());
rsx_log.notice("*** vp id = %d", vp_id);
rsx_log.notice("*** fp id = %d", fp_id);
}
};
auto post_link_func = [](std::unique_ptr<gl::glsl::program>& program)
{ {
// Program locations are guaranteed to not change after linking // Program locations are guaranteed to not change after linking
// Texture locations are simply bound to the TIUs so this can be done once // Texture locations are simply bound to the TIUs so this can be done once
@ -74,18 +108,12 @@ struct GLTraits
// Bind locations 0 and 1 to the stream buffers // Bind locations 0 and 1 to the stream buffers
program->uniforms[0] = GL_STREAM_BUFFER_START + 0; program->uniforms[0] = GL_STREAM_BUFFER_START + 0;
program->uniforms[1] = GL_STREAM_BUFFER_START + 1; program->uniforms[1] = GL_STREAM_BUFFER_START + 1;
}); };
if (g_cfg.video.log_programs) auto pipeline = compiler->compile(vertexProgramData.id, fragmentProgramData.id,
{ flags, post_create_func, post_link_func, callback);
rsx_log.notice("*** prog id = %d", result->id());
rsx_log.notice("*** vp id = %d", vertexProgramData.id);
rsx_log.notice("*** fp id = %d", fragmentProgramData.id);
rsx_log.notice("*** vp shader = \n%s", vertexProgramData.shader.get_source().c_str());
rsx_log.notice("*** fp shader = \n%s", fragmentProgramData.shader.get_source().c_str());
}
return result; return callback(pipeline);
} }
}; };

View File

@ -228,23 +228,61 @@ static const std::string rsx_fp_op_names[] =
struct RSXFragmentProgram struct RSXFragmentProgram
{ {
void *addr; struct data_storage_helper
u32 offset; {
u32 ucode_length; void* data_ptr = nullptr;
u32 total_length; std::vector<char> local_storage;
u32 ctrl;
u16 unnormalized_coords; data_storage_helper() = default;
u16 redirected_textures;
u16 shadow_textures; data_storage_helper(void* ptr)
bool two_sided_lighting; {
u32 texture_dimensions; data_ptr = ptr;
u32 texcoord_control_mask; local_storage.clear();
}
data_storage_helper(const data_storage_helper& other)
{
if (other.data_ptr == other.local_storage.data())
{
local_storage = other.local_storage;
data_ptr = local_storage.data();
}
else
{
data_ptr = other.data_ptr;
local_storage.clear();
}
}
void deep_copy(u32 max_length)
{
if (local_storage.empty() && data_ptr)
{
local_storage.resize(max_length);
std::memcpy(local_storage.data(), data_ptr, max_length);
data_ptr = local_storage.data();
}
}
} mutable data;
u32 offset = 0;
u32 ucode_length = 0;
u32 total_length = 0;
u32 ctrl = 0;
u16 unnormalized_coords = 0;
u16 redirected_textures = 0;
u16 shadow_textures = 0;
bool two_sided_lighting = false;
u32 texture_dimensions = 0;
u32 texcoord_control_mask = 0;
float texture_scale[16][4]; float texture_scale[16][4];
u8 textures_alpha_kill[16]; u8 textures_alpha_kill[16];
u8 textures_zfunc[16]; u8 textures_zfunc[16];
bool valid; bool valid = false;
rsx::texture_dimension_extended get_texture_dimension(u8 id) const rsx::texture_dimension_extended get_texture_dimension(u8 id) const
{ {
@ -264,6 +302,26 @@ struct RSXFragmentProgram
RSXFragmentProgram() RSXFragmentProgram()
{ {
memset(this, 0, sizeof(RSXFragmentProgram)); std::memset(texture_scale, 0, sizeof(float) * 16 * 4);
std::memset(textures_alpha_kill, 0, sizeof(u8) * 16);
std::memset(textures_zfunc, 0, sizeof(u8) * 16);
}
static RSXFragmentProgram clone(const RSXFragmentProgram& prog)
{
auto result = prog;
result.clone_data();
return result;
}
void* get_data() const
{
return data.data_ptr;
}
void clone_data() const
{
verify(HERE), ucode_length;
data.deep_copy(ucode_length);
} }
}; };

View File

@ -561,41 +561,6 @@ namespace rsx
} }
}); });
g_fxo->init<named_thread>("RSX Decompiler Thread", [this]
{
const auto shadermode = g_cfg.video.shadermode.get();
if (shadermode != shader_mode::async_recompiler && shadermode != shader_mode::async_with_interpreter)
{
// Die
return;
}
on_decompiler_init();
if (g_cfg.core.thread_scheduler_enabled)
{
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx));
}
while (!Emu.IsStopped() && !m_rsx_thread_exiting)
{
if (!on_decompiler_task())
{
if (Emu.IsPaused())
{
std::this_thread::sleep_for(1ms);
}
else
{
std::this_thread::sleep_for(500us);
}
}
}
on_decompiler_exit();
});
// Raise priority above other threads // Raise priority above other threads
thread_ctrl::set_native_priority(1); thread_ctrl::set_native_priority(1);
@ -1779,10 +1744,10 @@ namespace rsx
const auto [program_offset, program_location] = method_registers.shader_program_address(); const auto [program_offset, program_location] = method_registers.shader_program_address();
result.addr = vm::base(rsx::get_address(program_offset, program_location, HERE)); result.data = vm::base(rsx::get_address(program_offset, program_location, HERE));
current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr); current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.get_data());
result.addr = (static_cast<u8*>(result.addr) + current_fp_metadata.program_start_offset); result.data = (static_cast<u8*>(result.get_data()) + current_fp_metadata.program_start_offset);
result.offset = program_offset + current_fp_metadata.program_start_offset; result.offset = program_offset + current_fp_metadata.program_start_offset;
result.ucode_length = current_fp_metadata.program_ucode_length; result.ucode_length = current_fp_metadata.program_ucode_length;
result.total_length = result.ucode_length + current_fp_metadata.program_start_offset; result.total_length = result.ucode_length + current_fp_metadata.program_start_offset;

View File

@ -826,10 +826,6 @@ namespace rsx
*/ */
virtual void do_local_task(FIFO_state state); virtual void do_local_task(FIFO_state state);
virtual void on_decompiler_init() {}
virtual void on_decompiler_exit() {}
virtual bool on_decompiler_task() { return false; }
virtual void emit_geometry(u32) {} virtual void emit_geometry(u32) {}
void run_FIFO(); void run_FIFO();

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "VKHelpers.h" #include "VKHelpers.h"
#include "VKPipelineCompiler.h"
#include "VKRenderPass.h" #include "VKRenderPass.h"
#include "Utilities/StrUtil.h" #include "Utilities/StrUtil.h"
#include "Emu/IdManager.h" #include "Emu/IdManager.h"
@ -177,10 +178,8 @@ namespace vk
info.basePipelineIndex = -1; info.basePipelineIndex = -1;
info.basePipelineHandle = VK_NULL_HANDLE; info.basePipelineHandle = VK_NULL_HANDLE;
VkPipeline pipeline; auto compiler = vk::get_pipe_compiler();
vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &info, nullptr, &pipeline); m_program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE);
m_program = std::make_unique<vk::glsl::program>(*get_current_renderer(), pipeline, m_pipeline_layout);
declare_inputs(); declare_inputs();
} }

View File

@ -473,7 +473,10 @@ VKGSRender::VKGSRender() : GSRender()
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32); null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32);
int thread_count = g_cfg.video.shader_compiler_threads_count;
if (!thread_count) thread_count = -1;
vk::initialize_compiler_context(); vk::initialize_compiler_context();
vk::initialize_pipe_compiler(thread_count);
if (g_cfg.video.overlay) if (g_cfg.video.overlay)
{ {
@ -482,7 +485,7 @@ VKGSRender::VKGSRender() : GSRender()
m_text_writer->init(*m_device, vk::get_renderpass(*m_device, key)); m_text_writer->init(*m_device, vk::get_renderpass(*m_device, key));
} }
m_prog_buffer = std::make_unique<VKProgramBuffer> m_prog_buffer = std::make_unique<vk::program_cache>
( (
[this](const vk::pipeline_props& props, const RSXVertexProgram& vp, const RSXFragmentProgram& fp) [this](const vk::pipeline_props& props, const RSXVertexProgram& vp, const RSXFragmentProgram& fp)
{ {
@ -561,8 +564,9 @@ VKGSRender::~VKGSRender()
m_texture_cache.destroy(); m_texture_cache.destroy();
//Shaders //Shaders
vk::finalize_compiler_context(); vk::destroy_pipe_compiler(); // Ensure no pending shaders being compiled
m_prog_buffer->clear(); vk::finalize_compiler_context(); // Shut down the glslang compiler
m_prog_buffer->clear(); // Delete shader objects
m_shader_interpreter.destroy(); m_shader_interpreter.destroy();
m_persistent_attribute_storage.reset(); m_persistent_attribute_storage.reset();
@ -1629,7 +1633,7 @@ bool VKGSRender::load_program()
vertex_program.skip_vertex_input_check = true; vertex_program.skip_vertex_input_check = true;
fragment_program.unnormalized_coords = 0; fragment_program.unnormalized_coords = 0;
m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties, m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties,
shadermode != shader_mode::recompiler, true, *m_device, pipeline_layout).get(); shadermode != shader_mode::recompiler, true, *m_device, pipeline_layout);
vk::leave_uninterruptible(); vk::leave_uninterruptible();
@ -1815,8 +1819,7 @@ void VKGSRender::load_program_env()
control_masks[0] = rsx::method_registers.shader_control(); control_masks[0] = rsx::method_registers.shader_control();
control_masks[1] = current_fragment_program.texture_dimensions; control_masks[1] = current_fragment_program.texture_dimensions;
const auto fp_data = static_cast<u8*>(current_fragment_program.addr) + current_fp_metadata.program_start_offset; std::memcpy(fp_buf + 16, current_fragment_program.get_data(), current_fragment_program.ucode_length);
std::memcpy(fp_buf + 16, fp_data, current_fp_metadata.program_ucode_length);
m_fragment_instructions_buffer.unmap(); m_fragment_instructions_buffer.unmap();
m_fragment_instructions_buffer_info = { m_fragment_instructions_buffer.heap->value, fp_mapping, fp_block_length }; m_fragment_instructions_buffer_info = { m_fragment_instructions_buffer.heap->value, fp_mapping, fp_block_length };
@ -2499,8 +2502,3 @@ void VKGSRender::end_conditional_rendering()
{ {
thread::end_conditional_rendering(); thread::end_conditional_rendering();
} }
bool VKGSRender::on_decompiler_task()
{
return m_prog_buffer->async_update(8, *m_device, pipeline_layout).first;
}

View File

@ -22,7 +22,7 @@ namespace vk
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>; using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>;
using null_vertex_cache = vertex_cache; using null_vertex_cache = vertex_cache;
using shader_cache = rsx::shaders_cache<vk::pipeline_props, VKProgramBuffer>; using shader_cache = rsx::shaders_cache<vk::pipeline_props, vk::program_cache>;
struct vertex_upload_info struct vertex_upload_info
{ {
@ -390,7 +390,7 @@ public:
std::unique_ptr<vk::shader_cache> m_shaders_cache; std::unique_ptr<vk::shader_cache> m_shaders_cache;
private: private:
std::unique_ptr<VKProgramBuffer> m_prog_buffer; std::unique_ptr<vk::program_cache> m_prog_buffer;
std::unique_ptr<vk::swapchain_base> m_swapchain; std::unique_ptr<vk::swapchain_base> m_swapchain;
vk::context m_thread_context; vk::context m_thread_context;
@ -573,6 +573,4 @@ protected:
bool on_access_violation(u32 address, bool is_writing) override; bool on_access_violation(u32 address, bool is_writing) override;
void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override; void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override;
void on_semaphore_acquire_wait() override; void on_semaphore_acquire_wait() override;
bool on_decompiler_task() override;
}; };

View File

@ -6,6 +6,7 @@
#include "VKFramebuffer.h" #include "VKFramebuffer.h"
#include "VKResourceManager.h" #include "VKResourceManager.h"
#include "VKRenderPass.h" #include "VKRenderPass.h"
#include "VKPipelineCompiler.h"
#include "../Overlays/overlays.h" #include "../Overlays/overlays.h"
@ -233,7 +234,6 @@ namespace vk
vp.scissorCount = 1; vp.scissorCount = 1;
vp.viewportCount = 1; vp.viewportCount = 1;
VkPipeline pipeline;
VkGraphicsPipelineCreateInfo info = {}; VkGraphicsPipelineCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
info.pVertexInputState = &vi; info.pVertexInputState = &vi;
@ -251,9 +251,8 @@ namespace vk
info.basePipelineHandle = VK_NULL_HANDLE; info.basePipelineHandle = VK_NULL_HANDLE;
info.renderPass = render_pass; info.renderPass = render_pass;
CHECK_RESULT(vkCreateGraphicsPipelines(*m_device, nullptr, 1, &info, NULL, &pipeline)); auto compiler = vk::get_pipe_compiler();
auto program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, get_vertex_inputs(), get_fragment_inputs());
auto program = std::make_unique<vk::glsl::program>(*m_device, pipeline, m_pipeline_layout, get_vertex_inputs(), get_fragment_inputs());
auto result = program.get(); auto result = program.get();
m_program_cache[storage_key] = std::move(program); m_program_cache[storage_key] = std::move(program);

View File

@ -0,0 +1,231 @@
#include "stdafx.h"
#include "VKPipelineCompiler.h"
#include "VKRenderPass.h"
#include "Utilities/Thread.h"
#include <thread>
namespace vk
{
// Global list of worker threads
std::unique_ptr<named_thread_group<pipe_compiler>> g_pipe_compilers;
int g_num_pipe_compilers = 0;
atomic_t<int> g_compiler_index{};
pipe_compiler::pipe_compiler()
{
// TODO: Initialize workqueue
}
pipe_compiler::~pipe_compiler()
{
// TODO: Destroy and do cleanup
}
void pipe_compiler::initialize(const vk::render_device* pdev)
{
m_device = pdev;
}
void pipe_compiler::operator()()
{
while (thread_ctrl::state() != thread_state::aborting)
{
for (auto&& job : m_work_queue.pop_all())
{
if (job.is_graphics_job)
{
auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.pipe_layout, job.inputs, {});
job.callback_func(compiled);
}
else
{
auto compiled = int_compile_compute_pipe(job.compute_data, job.pipe_layout);
job.callback_func(compiled);
}
}
m_work_queue.wait();
}
}
std::unique_ptr<glsl::program> pipe_compiler::int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout)
{
VkPipeline pipeline;
vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &create_info, nullptr, &pipeline);
return std::make_unique<vk::glsl::program>(*m_device, pipeline, pipe_layout);
}
std::unique_ptr<glsl::program> pipe_compiler::int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout,
const std::vector<glsl::program_input>& vs_inputs, const std::vector<glsl::program_input>& fs_inputs)
{
VkPipeline pipeline;
CHECK_RESULT(vkCreateGraphicsPipelines(*m_device, nullptr, 1, &create_info, NULL, &pipeline));
auto result = std::make_unique<vk::glsl::program>(*m_device, pipeline, pipe_layout, vs_inputs, fs_inputs);
result->link();
return result;
}
std::unique_ptr<glsl::program> pipe_compiler::int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout,
const std::vector<glsl::program_input>& vs_inputs, const std::vector<glsl::program_input>& fs_inputs)
{
VkPipelineShaderStageCreateInfo shader_stages[2] = {};
shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
shader_stages[0].module = modules[0];
shader_stages[0].pName = "main";
shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
shader_stages[1].module = modules[1];
shader_stages[1].pName = "main";
std::vector<VkDynamicState> dynamic_state_descriptors;
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_VIEWPORT);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_SCISSOR);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_LINE_WIDTH);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS);
if (vk::get_current_renderer()->get_depth_bounds_support())
{
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS);
}
VkPipelineDynamicStateCreateInfo dynamic_state_info = {};
dynamic_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamic_state_info.pDynamicStates = dynamic_state_descriptors.data();
dynamic_state_info.dynamicStateCount = ::size32(dynamic_state_descriptors);
VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
VkPipelineViewportStateCreateInfo vp = {};
vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
vp.viewportCount = 1;
vp.scissorCount = 1;
VkPipelineMultisampleStateCreateInfo ms = create_info.state.ms;
verify("Multisample state mismatch!" HERE), ms.rasterizationSamples == VkSampleCountFlagBits((create_info.renderpass_key >> 16) & 0xF);
if (ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT)
{
// Update the sample mask pointer
ms.pSampleMask = &create_info.state.temp_storage.msaa_sample_mask;
}
// Rebase pointers from pipeline structure in case it is moved/copied
VkPipelineColorBlendStateCreateInfo cs = create_info.state.cs;
cs.pAttachments = create_info.state.att_state;
VkGraphicsPipelineCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
info.pVertexInputState = &vi;
info.pInputAssemblyState = &create_info.state.ia;
info.pRasterizationState = &create_info.state.rs;
info.pColorBlendState = &cs;
info.pMultisampleState = &ms;
info.pViewportState = &vp;
info.pDepthStencilState = &create_info.state.ds;
info.stageCount = 2;
info.pStages = shader_stages;
info.pDynamicState = &dynamic_state_info;
info.layout = pipe_layout;
info.basePipelineIndex = -1;
info.basePipelineHandle = VK_NULL_HANDLE;
info.renderPass = vk::get_renderpass(*m_device, create_info.renderpass_key);
return int_compile_graphics_pipe(info, pipe_layout, vs_inputs, fs_inputs);
}
std::unique_ptr<glsl::program> pipe_compiler::compile(
const VkComputePipelineCreateInfo& create_info,
VkPipelineLayout pipe_layout,
op_flags flags, callback_t callback)
{
if (flags == COMPILE_INLINE)
{
return int_compile_compute_pipe(create_info, pipe_layout);
}
m_work_queue.push(create_info, pipe_layout, callback);
return {};
}
std::unique_ptr<glsl::program> pipe_compiler::compile(
const VkGraphicsPipelineCreateInfo& create_info,
VkPipelineLayout pipe_layout,
op_flags flags, callback_t /*callback*/,
const std::vector<glsl::program_input>& vs_inputs, const std::vector<glsl::program_input>& fs_inputs)
{
// It is very inefficient to defer this as all pointers need to be saved
verify(HERE), flags == COMPILE_INLINE;
return int_compile_graphics_pipe(create_info, pipe_layout, vs_inputs, fs_inputs);
}
std::unique_ptr<glsl::program> pipe_compiler::compile(
const vk::pipeline_props& create_info,
VkShaderModule module_handles[2],
VkPipelineLayout pipe_layout,
op_flags flags, callback_t callback,
const std::vector<glsl::program_input>& vs_inputs, const std::vector<glsl::program_input>& fs_inputs)
{
if (flags == COMPILE_INLINE)
{
return int_compile_graphics_pipe(create_info, module_handles, pipe_layout, vs_inputs, fs_inputs);
}
m_work_queue.push(create_info, pipe_layout, module_handles, vs_inputs, fs_inputs, callback);
return {};
}
void initialize_pipe_compiler(int num_worker_threads)
{
if (num_worker_threads == -1)
{
// Select optimal number of compiler threads
const auto hw_threads = std::thread::hardware_concurrency();
if (hw_threads >= 12)
{
num_worker_threads = 4;
}
else if (hw_threads >= 8)
{
num_worker_threads = 2;
}
else
{
num_worker_threads = 1;
}
}
verify(HERE), num_worker_threads >= 1;
const vk::render_device* dev = vk::get_current_renderer();
verify("Cannot initialize pipe compiler before creating a logical device" HERE), dev;
// Create the thread pool
g_pipe_compilers = std::make_unique<named_thread_group<pipe_compiler>>("RSX.W", num_worker_threads);
g_num_pipe_compilers = num_worker_threads;
// Initialize the workers. At least one inline compiler shall exist (doesn't actually run)
for (pipe_compiler& compiler : *g_pipe_compilers.get())
{
compiler.initialize(dev);
}
}
void destroy_pipe_compiler()
{
g_pipe_compilers.reset();
}
pipe_compiler* get_pipe_compiler()
{
verify(HERE), g_pipe_compilers;
int thread_index = g_compiler_index++;
return g_pipe_compilers.get()->begin() + (thread_index % g_num_pipe_compilers);
}
}

View File

@ -0,0 +1,183 @@
#pragma once
#include "VKHelpers.h"
#include "../rsx_utils.h"
#include "Utilities/hash.h"
#include "Utilities/lockless.h"
namespace vk
{
struct pipeline_props
{
graphics_pipeline_state state;
u64 renderpass_key;
bool operator==(const pipeline_props& other) const
{
if (renderpass_key != other.renderpass_key)
return false;
if (memcmp(&state.ia, &other.state.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo)))
return false;
if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState)))
return false;
if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))
return false;
// Cannot memcmp cs due to pAttachments being a pointer to memory
if (state.cs.logicOp != other.state.cs.logicOp ||
state.cs.logicOpEnable != other.state.cs.logicOpEnable ||
memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32)))
return false;
if (memcmp(&state.ds, &other.state.ds, sizeof(VkPipelineDepthStencilStateCreateInfo)))
return false;
if (state.ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT)
{
if (memcmp(&state.ms, &other.state.ms, sizeof(VkPipelineMultisampleStateCreateInfo)))
return false;
if (state.temp_storage.msaa_sample_mask != other.state.temp_storage.msaa_sample_mask)
return false;
}
return true;
}
};
class pipe_compiler
{
public:
enum op_flags
{
COMPILE_DEFAULT = 0,
COMPILE_INLINE = 1,
COMPILE_DEFERRED = 2
};
using callback_t = std::function<void(std::unique_ptr<glsl::program>&)>;
pipe_compiler();
~pipe_compiler();
void initialize(const vk::render_device* pdev);
std::unique_ptr<glsl::program> compile(
const VkComputePipelineCreateInfo& create_info,
VkPipelineLayout pipe_layout,
op_flags flags, callback_t callback = {});
std::unique_ptr<glsl::program> compile(
const VkGraphicsPipelineCreateInfo& create_info,
VkPipelineLayout pipe_layout,
op_flags flags, callback_t callback = {},
const std::vector<glsl::program_input>& vs_inputs = {},
const std::vector<glsl::program_input>& fs_inputs = {});
std::unique_ptr<glsl::program> compile(
const vk::pipeline_props &create_info,
VkShaderModule module_handles[2],
VkPipelineLayout pipe_layout,
op_flags flags, callback_t callback = {},
const std::vector<glsl::program_input>& vs_inputs = {},
const std::vector<glsl::program_input>& fs_inputs = {});
void operator()();
private:
class compute_pipeline_props : public VkComputePipelineCreateInfo
{
// Storage for the entry name
std::string entry_name;
public:
compute_pipeline_props() = default;
compute_pipeline_props(const VkComputePipelineCreateInfo& info)
{
(*static_cast<VkComputePipelineCreateInfo*>(this)) = info;
entry_name = info.stage.pName;
stage.pName = entry_name.c_str();
}
};
struct pipe_compiler_job
{
bool is_graphics_job;
callback_t callback_func;
vk::pipeline_props graphics_data;
compute_pipeline_props compute_data;
VkPipelineLayout pipe_layout;
VkShaderModule graphics_modules[2];
std::vector<glsl::program_input> inputs;
pipe_compiler_job(
const vk::pipeline_props& props,
VkPipelineLayout layout,
VkShaderModule modules[2],
const std::vector<glsl::program_input>& vs_in,
const std::vector<glsl::program_input>& fs_in,
callback_t func)
{
callback_func = func;
graphics_data = props;
pipe_layout = layout;
graphics_modules[0] = modules[0];
graphics_modules[1] = modules[1];
is_graphics_job = true;
inputs.reserve(vs_in.size() + fs_in.size());
inputs.insert(inputs.end(), vs_in.begin(), vs_in.end());
inputs.insert(inputs.end(), fs_in.begin(), fs_in.end());
}
pipe_compiler_job(
const VkComputePipelineCreateInfo& props,
VkPipelineLayout layout,
callback_t func)
{
callback_func = func;
compute_data = props;
pipe_layout = layout;
is_graphics_job = false;
}
};
const vk::render_device* m_device = nullptr;
lf_queue<pipe_compiler_job> m_work_queue;
std::unique_ptr<glsl::program> int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout);
std::unique_ptr<glsl::program> int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout,
const std::vector<glsl::program_input>& vs_inputs, const std::vector<glsl::program_input>& fs_inputs);
std::unique_ptr<glsl::program> int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout,
const std::vector<glsl::program_input>& vs_inputs, const std::vector<glsl::program_input>& fs_inputs);
};
void initialize_pipe_compiler(int num_worker_threads = -1);
void destroy_pipe_compiler();
pipe_compiler* get_pipe_compiler();
}
namespace rpcs3
{
template <>
size_t hash_struct<vk::pipeline_props>(const vk::pipeline_props &pipelineProperties)
{
size_t seed = hash_base(pipelineProperties.renderpass_key);
seed ^= hash_struct(pipelineProperties.state.ia);
seed ^= hash_struct(pipelineProperties.state.ds);
seed ^= hash_struct(pipelineProperties.state.rs);
seed ^= hash_struct(pipelineProperties.state.ms);
seed ^= hash_base(pipelineProperties.state.temp_storage.msaa_sample_mask);
// Do not compare pointers to memory!
VkPipelineColorBlendStateCreateInfo tmp;
memcpy(&tmp, &pipelineProperties.state.cs, sizeof(VkPipelineColorBlendStateCreateInfo));
tmp.pAttachments = nullptr;
seed ^= hash_struct(pipelineProperties.state.att_state[0]);
return hash_base(seed);
}
}

View File

@ -5,82 +5,20 @@
#include "Utilities/hash.h" #include "Utilities/hash.h"
#include "VKHelpers.h" #include "VKHelpers.h"
#include "VKRenderPass.h" #include "VKRenderPass.h"
#include "VKPipelineCompiler.h"
namespace vk namespace vk
{ {
struct pipeline_props struct VKTraits
{ {
graphics_pipeline_state state;
u64 renderpass_key;
bool operator==(const pipeline_props& other) const
{
if (renderpass_key != other.renderpass_key)
return false;
if (memcmp(&state.ia, &other.state.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo)))
return false;
if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState)))
return false;
if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))
return false;
// Cannot memcmp cs due to pAttachments being a pointer to memory
if (state.cs.logicOp != other.state.cs.logicOp ||
state.cs.logicOpEnable != other.state.cs.logicOpEnable ||
memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32)))
return false;
if (memcmp(&state.ds, &other.state.ds, sizeof(VkPipelineDepthStencilStateCreateInfo)))
return false;
if (state.ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT)
{
if (memcmp(&state.ms, &other.state.ms, sizeof(VkPipelineMultisampleStateCreateInfo)))
return false;
if (state.temp_storage.msaa_sample_mask != other.state.temp_storage.msaa_sample_mask)
return false;
}
return true;
}
};
}
namespace rpcs3
{
template <>
size_t hash_struct<vk::pipeline_props>(const vk::pipeline_props &pipelineProperties)
{
size_t seed = hash_base(pipelineProperties.renderpass_key);
seed ^= hash_struct(pipelineProperties.state.ia);
seed ^= hash_struct(pipelineProperties.state.ds);
seed ^= hash_struct(pipelineProperties.state.rs);
seed ^= hash_struct(pipelineProperties.state.ms);
seed ^= hash_base(pipelineProperties.state.temp_storage.msaa_sample_mask);
// Do not compare pointers to memory!
VkPipelineColorBlendStateCreateInfo tmp;
memcpy(&tmp, &pipelineProperties.state.cs, sizeof(VkPipelineColorBlendStateCreateInfo));
tmp.pAttachments = nullptr;
seed ^= hash_struct(pipelineProperties.state.att_state[0]);
return hash_base(seed);
}
}
struct VKTraits
{
using vertex_program_type = VKVertexProgram; using vertex_program_type = VKVertexProgram;
using fragment_program_type = VKFragmentProgram; using fragment_program_type = VKFragmentProgram;
using pipeline_type = vk::glsl::program;
using pipeline_storage_type = std::unique_ptr<vk::glsl::program>; using pipeline_storage_type = std::unique_ptr<vk::glsl::program>;
using pipeline_properties = vk::pipeline_props; using pipeline_properties = vk::pipeline_props;
static static
void recompile_fragment_program(const RSXFragmentProgram &RSXFP, fragment_program_type& fragmentProgramData, size_t ID) void recompile_fragment_program(const RSXFragmentProgram& RSXFP, fragment_program_type& fragmentProgramData, size_t ID)
{ {
fragmentProgramData.Decompile(RSXFP); fragmentProgramData.Decompile(RSXFP);
fragmentProgramData.id = static_cast<u32>(ID); fragmentProgramData.id = static_cast<u32>(ID);
@ -88,7 +26,7 @@ struct VKTraits
} }
static static
void recompile_vertex_program(const RSXVertexProgram &RSXVP, vertex_program_type& vertexProgramData, size_t ID) void recompile_vertex_program(const RSXVertexProgram& RSXVP, vertex_program_type& vertexProgramData, size_t ID)
{ {
vertexProgramData.Decompile(RSXVP); vertexProgramData.Decompile(RSXVP);
vertexProgramData.id = static_cast<u32>(ID); vertexProgramData.id = static_cast<u32>(ID);
@ -96,7 +34,7 @@ struct VKTraits
} }
static static
void validate_pipeline_properties(const VKVertexProgram&, const VKFragmentProgram &fp, vk::pipeline_props& properties) void validate_pipeline_properties(const VKVertexProgram&, const VKFragmentProgram& fp, vk::pipeline_props& properties)
{ {
//Explicitly disable writing to undefined registers //Explicitly disable writing to undefined registers
properties.state.att_state[0].colorWriteMask &= fp.output_color_masks[0]; properties.state.att_state[0].colorWriteMask &= fp.output_color_masks[0];
@ -106,115 +44,58 @@ struct VKTraits
} }
static static
pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, pipeline_type* build_pipeline(
const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout) const vertex_program_type& vertexProgramData,
const fragment_program_type& fragmentProgramData,
const vk::pipeline_props& pipelineProperties,
bool compile_async,
std::function<pipeline_type*(pipeline_storage_type&)> callback,
VkDevice dev, VkPipelineLayout common_pipeline_layout)
{ {
VkPipelineShaderStageCreateInfo shader_stages[2] = {}; const auto compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE;
shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; VkShaderModule modules[2] = { vertexProgramData.handle, fragmentProgramData.handle };
shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
shader_stages[0].module = vertexProgramData.handle;
shader_stages[0].pName = "main";
shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; auto compiler = vk::get_pipe_compiler();
shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; auto result = compiler->compile(
shader_stages[1].module = fragmentProgramData.handle; pipelineProperties, modules, common_pipeline_layout,
shader_stages[1].pName = "main"; compiler_flags, callback,
vertexProgramData.uniforms,
fragmentProgramData.uniforms);
std::vector<VkDynamicState> dynamic_state_descriptors; return callback(result);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_VIEWPORT); }
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_SCISSOR); };
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_LINE_WIDTH);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS);
if (vk::get_current_renderer()->get_depth_bounds_support()) struct program_cache : public program_state_cache<VKTraits>
{ {
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); program_cache(decompiler_callback_t callback)
}
VkPipelineDynamicStateCreateInfo dynamic_state_info = {};
dynamic_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamic_state_info.pDynamicStates = dynamic_state_descriptors.data();
dynamic_state_info.dynamicStateCount = ::size32(dynamic_state_descriptors);
VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
VkPipelineViewportStateCreateInfo vp = {};
vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
vp.viewportCount = 1;
vp.scissorCount = 1;
VkPipelineMultisampleStateCreateInfo ms = pipelineProperties.state.ms;
verify("Multisample state mismatch!" HERE), ms.rasterizationSamples == VkSampleCountFlagBits((pipelineProperties.renderpass_key >> 16) & 0xF);
if (ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT)
{
// Update the sample mask pointer
ms.pSampleMask = &pipelineProperties.state.temp_storage.msaa_sample_mask;
}
// Rebase pointers from pipeline structure in case it is moved/copied
VkPipelineColorBlendStateCreateInfo cs = pipelineProperties.state.cs;
cs.pAttachments = pipelineProperties.state.att_state;
VkPipeline pipeline;
VkGraphicsPipelineCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
info.pVertexInputState = &vi;
info.pInputAssemblyState = &pipelineProperties.state.ia;
info.pRasterizationState = &pipelineProperties.state.rs;
info.pColorBlendState = &cs;
info.pMultisampleState = &ms;
info.pViewportState = &vp;
info.pDepthStencilState = &pipelineProperties.state.ds;
info.stageCount = 2;
info.pStages = shader_stages;
info.pDynamicState = &dynamic_state_info;
info.layout = common_pipeline_layout;
info.basePipelineIndex = -1;
info.basePipelineHandle = VK_NULL_HANDLE;
info.renderPass = vk::get_renderpass(dev, pipelineProperties.renderpass_key);
CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline));
pipeline_storage_type result = std::make_unique<vk::glsl::program>(dev, pipeline, common_pipeline_layout, vertexProgramData.uniforms, fragmentProgramData.uniforms);
result->link();
return result;
}
};
struct VKProgramBuffer : public program_state_cache<VKTraits>
{
VKProgramBuffer(decompiler_callback_t callback)
{ {
notify_pipeline_compiled = callback; notify_pipeline_compiled = callback;
} }
u64 get_hash(const vk::pipeline_props &props) u64 get_hash(const vk::pipeline_props& props)
{ {
return rpcs3::hash_struct<vk::pipeline_props>(props); return rpcs3::hash_struct<vk::pipeline_props>(props);
} }
u64 get_hash(const RSXVertexProgram &prog) u64 get_hash(const RSXVertexProgram& prog)
{ {
return program_hash_util::vertex_program_utils::get_vertex_program_ucode_hash(prog); return program_hash_util::vertex_program_utils::get_vertex_program_ucode_hash(prog);
} }
u64 get_hash(const RSXFragmentProgram &prog) u64 get_hash(const RSXFragmentProgram& prog)
{ {
return program_hash_util::fragment_program_utils::get_fragment_program_ucode_hash(prog); return program_hash_util::fragment_program_utils::get_fragment_program_ucode_hash(prog);
} }
template <typename... Args> template <typename... Args>
void add_pipeline_entry(RSXVertexProgram &vp, RSXFragmentProgram &fp, vk::pipeline_props &props, Args&& ...args) void add_pipeline_entry(RSXVertexProgram& vp, RSXFragmentProgram& fp, vk::pipeline_props& props, Args&& ...args)
{ {
vp.skip_vertex_input_check = true; vp.skip_vertex_input_check = true;
get_graphics_pipeline(vp, fp, props, false, false, std::forward<Args>(args)...); get_graphics_pipeline(vp, fp, props, false, false, std::forward<Args>(args)...);
} }
void preload_programs(RSXVertexProgram &vp, RSXFragmentProgram &fp) void preload_programs(RSXVertexProgram& vp, RSXFragmentProgram& fp)
{ {
vp.skip_vertex_input_check = true; vp.skip_vertex_input_check = true;
search_vertex_program(vp); search_vertex_program(vp);
@ -225,4 +106,5 @@ struct VKProgramBuffer : public program_state_cache<VKTraits>
{ {
return m_cache_miss_flag; return m_cache_miss_flag;
} }
}; };
}

View File

@ -484,7 +484,6 @@ namespace vk
VkPipelineColorBlendStateCreateInfo cs = properties.state.cs; VkPipelineColorBlendStateCreateInfo cs = properties.state.cs;
cs.pAttachments = properties.state.att_state; cs.pAttachments = properties.state.att_state;
VkPipeline pipeline;
VkGraphicsPipelineCreateInfo info = {}; VkGraphicsPipelineCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
info.pVertexInputState = &vi; info.pVertexInputState = &vi;
@ -502,8 +501,9 @@ namespace vk
info.basePipelineHandle = VK_NULL_HANDLE; info.basePipelineHandle = VK_NULL_HANDLE;
info.renderPass = vk::get_renderpass(m_device, properties.renderpass_key); info.renderPass = vk::get_renderpass(m_device, properties.renderpass_key);
CHECK_RESULT(vkCreateGraphicsPipelines(m_device, nullptr, 1, &info, NULL, &pipeline)); auto compiler = vk::get_pipe_compiler();
return new vk::glsl::program(m_device, pipeline, m_shared_pipeline_layout, m_vs_inputs, m_fs_inputs); auto program = compiler->compile(info, m_shared_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, m_vs_inputs, m_fs_inputs);
return program.release();
} }
void shader_interpreter::update_fragment_textures(const std::array<VkDescriptorImageInfo, 68>& sampled_images, VkDescriptorSet descriptor_set) void shader_interpreter::update_fragment_textures(const std::array<VkDescriptorImageInfo, 68>& sampled_images, VkDescriptorSet descriptor_set)

View File

@ -3,6 +3,7 @@
#include "VKVertexProgram.h" #include "VKVertexProgram.h"
#include "VKFragmentProgram.h" #include "VKFragmentProgram.h"
#include "VKRenderPass.h" #include "VKRenderPass.h"
#include "VKPipelineCompiler.h"
#include "../Common/TextGlyphs.h" #include "../Common/TextGlyphs.h"
namespace vk namespace vk
@ -176,7 +177,6 @@ namespace vk
VkPipelineDepthStencilStateCreateInfo ds = {}; VkPipelineDepthStencilStateCreateInfo ds = {};
ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
VkPipeline pipeline;
VkGraphicsPipelineCreateInfo info = {}; VkGraphicsPipelineCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
info.pVertexInputState = &vi; info.pVertexInputState = &vi;
@ -194,10 +194,8 @@ namespace vk
info.basePipelineHandle = VK_NULL_HANDLE; info.basePipelineHandle = VK_NULL_HANDLE;
info.renderPass = m_render_pass; info.renderPass = m_render_pass;
CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline)); auto compiler = vk::get_pipe_compiler();
m_program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE);
const std::vector<vk::glsl::program_input> unused;
m_program = std::make_unique<vk::glsl::program>(static_cast<VkDevice>(dev), pipeline, m_pipeline_layout, unused, unused);
} }
void load_program(vk::command_buffer &cmd, float scale_x, float scale_y, const float *offsets, size_t nb_offsets, std::array<float, 4> color) void load_program(vk::command_buffer &cmd, float scale_x, float scale_y, const float *offsets, size_t nb_offsets, std::array<float, 4> color)

View File

@ -645,7 +645,7 @@ namespace rsx
if (!fs::is_file(fp_name)) if (!fs::is_file(fp_name))
{ {
fs::file(fp_name, fs::rewrite).write(fp.addr, fp.ucode_length); fs::file(fp_name, fs::rewrite).write(fp.get_data(), fp.ucode_length);
} }
if (!fs::is_file(vp_name)) if (!fs::is_file(vp_name))
@ -700,7 +700,7 @@ namespace rsx
{ {
std::lock_guard<std::mutex> lock(fpd_mutex); std::lock_guard<std::mutex> lock(fpd_mutex);
fragment_program_data[program_hash] = data; fragment_program_data[program_hash] = data;
fp.addr = fragment_program_data[program_hash].data(); fp.data = fragment_program_data[program_hash].data();
} }
fp.ucode_length = ::size32(data); fp.ucode_length = ::size32(data);

View File

@ -152,6 +152,7 @@ struct cfg_root : cfg::node
cfg::_int<50, 800> resolution_scale_percent{ this, "Resolution Scale", 100 }; cfg::_int<50, 800> resolution_scale_percent{ this, "Resolution Scale", 100 };
cfg::_int<0, 16> anisotropic_level_override{ this, "Anisotropic Filter Override", 0, true }; cfg::_int<0, 16> anisotropic_level_override{ this, "Anisotropic Filter Override", 0, true };
cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 }; cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 };
cfg::_int<0, 16> shader_compiler_threads_count{ this, "Shader Compiler Threads", 0 };
cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true }; cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true };
cfg::_int<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true }; cfg::_int<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true };
cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways

View File

@ -65,8 +65,6 @@
</ImportGroup> </ImportGroup>
<PropertyGroup Label="UserMacros" /> <PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">
<ClCompile />
<ClCompile />
<ClCompile> <ClCompile>
<Optimization>MaxSpeed</Optimization> <Optimization>MaxSpeed</Optimization>
</ClCompile> </ClCompile>
@ -80,6 +78,7 @@
<ClInclude Include="Emu\RSX\GL\GLCompute.h" /> <ClInclude Include="Emu\RSX\GL\GLCompute.h" />
<ClInclude Include="Emu\RSX\GL\GLExecutionState.h" /> <ClInclude Include="Emu\RSX\GL\GLExecutionState.h" />
<ClInclude Include="Emu\RSX\GL\GLOverlays.h" /> <ClInclude Include="Emu\RSX\GL\GLOverlays.h" />
<ClInclude Include="Emu\RSX\GL\GLPipelineCompiler.h" />
<ClInclude Include="Emu\RSX\GL\GLTextOut.h" /> <ClInclude Include="Emu\RSX\GL\GLTextOut.h" />
<ClInclude Include="Emu\RSX\GL\GLCommonDecompiler.h" /> <ClInclude Include="Emu\RSX\GL\GLCommonDecompiler.h" />
<ClInclude Include="Emu\RSX\GL\GLFragmentProgram.h" /> <ClInclude Include="Emu\RSX\GL\GLFragmentProgram.h" />
@ -99,6 +98,7 @@
<ClCompile Include="Emu\RSX\GL\GLDraw.cpp" /> <ClCompile Include="Emu\RSX\GL\GLDraw.cpp" />
<ClCompile Include="Emu\RSX\GL\GLFragmentProgram.cpp" /> <ClCompile Include="Emu\RSX\GL\GLFragmentProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" /> <ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPipelineCompiler.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" /> <ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" /> <ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" /> <ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />

View File

@ -13,6 +13,7 @@
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" /> <ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\GL\GLShaderInterpreter.cpp" /> <ClCompile Include="Emu\RSX\GL\GLShaderInterpreter.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" /> <ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPipelineCompiler.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLTexture.h" /> <ClInclude Include="Emu\RSX\GL\GLTexture.h" />
@ -31,5 +32,6 @@
<ClInclude Include="Emu\RSX\GL\GLOverlays.h" /> <ClInclude Include="Emu\RSX\GL\GLOverlays.h" />
<ClInclude Include="Emu\RSX\GL\GLExecutionState.h" /> <ClInclude Include="Emu\RSX\GL\GLExecutionState.h" />
<ClInclude Include="Emu\RSX\GL\GLCompute.h" /> <ClInclude Include="Emu\RSX\GL\GLCompute.h" />
<ClInclude Include="Emu\RSX\GL\GLPipelineCompiler.h" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -33,6 +33,7 @@
<ClInclude Include="Emu\RSX\VK\VKGSRender.h" /> <ClInclude Include="Emu\RSX\VK\VKGSRender.h" />
<ClInclude Include="Emu\RSX\VK\VKHelpers.h" /> <ClInclude Include="Emu\RSX\VK\VKHelpers.h" />
<ClInclude Include="Emu\RSX\VK\VKOverlays.h" /> <ClInclude Include="Emu\RSX\VK\VKOverlays.h" />
<ClInclude Include="Emu\RSX\VK\VKPipelineCompiler.h" />
<ClInclude Include="Emu\RSX\VK\VKProgramBuffer.h" /> <ClInclude Include="Emu\RSX\VK\VKProgramBuffer.h" />
<ClInclude Include="Emu\RSX\VK\VKQueryPool.h" /> <ClInclude Include="Emu\RSX\VK\VKQueryPool.h" />
<ClInclude Include="Emu\RSX\VK\VKRenderPass.h" /> <ClInclude Include="Emu\RSX\VK\VKRenderPass.h" />
@ -55,6 +56,7 @@
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" /> <ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" /> <ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" />
<ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" /> <ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKPipelineCompiler.cpp" />
<ClCompile Include="Emu\RSX\VK\VKPresent.cpp" /> <ClCompile Include="Emu\RSX\VK\VKPresent.cpp" />
<ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" /> <ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" />
<ClCompile Include="Emu\RSX\VK\VKQueryPool.cpp" /> <ClCompile Include="Emu\RSX\VK\VKQueryPool.cpp" />

View File

@ -21,6 +21,7 @@
<ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp" /> <ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp" />
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" /> <ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
<ClCompile Include="Emu\RSX\VK\VKQueryPool.cpp" /> <ClCompile Include="Emu\RSX\VK\VKQueryPool.cpp" />
<ClCompile Include="Emu\RSX\VK\VKPipelineCompiler.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" /> <ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
@ -44,5 +45,6 @@
<ClInclude Include="Emu\RSX\VK\VulkanAPI.h" /> <ClInclude Include="Emu\RSX\VK\VulkanAPI.h" />
<ClInclude Include="Emu\RSX\VK\VKCommandStream.h" /> <ClInclude Include="Emu\RSX\VK\VKCommandStream.h" />
<ClInclude Include="Emu\RSX\VK\VKQueryPool.h" /> <ClInclude Include="Emu\RSX\VK\VKQueryPool.h" />
<ClInclude Include="Emu\RSX\VK\VKPipelineCompiler.h" />
</ItemGroup> </ItemGroup>
</Project> </Project>