rsx/gl: Implement variable path interpreter for optimal performance

This commit is contained in:
kd-11 2020-04-19 20:23:12 +03:00 committed by Ivan
parent 930bc9179d
commit bc5c4c9205
5 changed files with 285 additions and 109 deletions

View File

@ -223,6 +223,8 @@ vec4 read_cond()
return shuffle(cc[GET_BITS(1, 31, 1)], GET_BITS(1, 21, 8));
}
#ifdef WITH_TEXTURES
vec4 _texture(in vec4 coord, float bias)
{
const uint tex_num = GET_BITS(0, 17, 4);
@ -275,6 +277,8 @@ vec4 _textureLod(in vec4 coord, float lod)
return vec4(0.);
}
#endif
void write_dst(in vec4 value)
{
bvec4 inst_mask = bvec4(
@ -423,8 +427,11 @@ void main()
value = sin(s0.xxxx); break;
case RSX_FP_OPCODE_NRM:
value.xyz = normalize(s0.xyz); break;
#ifdef WITH_TEXTURES
case RSX_FP_OPCODE_TEX:
value = _texture(s0, 0.f); break;
#endif
default:
handled = false;
}
@ -470,6 +477,8 @@ void main()
value = s0 / s1.xxxx;
case RSX_FP_OPCODE_DIVSQ:
value = s0 * inversesqrt(s1.xxxx); break;
#ifdef WITH_TEXTURES
//case RSX_FP_OPCODE_TXP:
//case RSX_FP_OPCODE_TXD:
case RSX_FP_OPCODE_TXL:
@ -478,6 +487,7 @@ void main()
value = _texture(s0, s1.x); break;
//case RSX_FP_OPCODE_TEXBEM:
//case RSX_FP_OPCODE_TXPBEM:
#endif
default:
handled = false;
}
@ -529,29 +539,41 @@ void main()
write_dst(value);
}
if (!shader_attribute(CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS))
{
#ifdef WITH_HALF_OUTPUT_REGISTER
ocol0 = regs16[0];
ocol1 = regs16[4];
ocol1 = regs16[6];
ocol1 = regs16[8];
}
else
{
#else
ocol0 = regs32[0];
ocol1 = regs32[2];
ocol1 = regs32[3];
ocol1 = regs32[4];
}
#endif
if (shader_attribute(CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT))
{
gl_FragDepth = regs32[1].z;
}
else
{
gl_FragDepth = gl_FragCoord.z;
}
#ifdef WITH_DEPTH_EXPORT
gl_FragDepth = regs32[1].z;
#endif
// Typically an application will pick one strategy and stick with it
#ifdef ALPHA_TEST_GEQUAL
if (ocol0.a < alpha_ref) discard; // gequal
#endif
#ifdef ALPHA_TEST_GREATER
if (ocol0.a > alpha_ref) discard; // greater
#endif
#ifdef ALPHA_TEST_LESS
if (ocol0.a >= alpha_ref) discard; // less
#endif
#ifdef ALPHA_TEST_LEQUAL
if (ocol0.a > alpha_ref) discard; // lequal
#endif
#ifdef ALPHA_TEST_EQUAL
if (ocol0.a != alpha_ref) discard; // equal
#endif
#ifdef ALPHA_TEST_NEQUAL
if (ocol0.a == alpha_ref) discard; // nequal
#endif
}
)"

View File

@ -5,6 +5,19 @@ namespace program_common
{
namespace interpreter
{
enum compiler_option
{
COMPILER_OPT_ENABLE_TEXTURES = 1,
COMPILER_OPT_ENABLE_DEPTH_EXPORT = 2,
COMPILER_OPT_ENABLE_F32_EXPORT = 4,
COMPILER_OPT_ENABLE_ALPHA_TEST_GE = 8,
COMPILER_OPT_ENABLE_ALPHA_TEST_G = 16,
COMPILER_OPT_ENABLE_ALPHA_TEST_LE = 32,
COMPILER_OPT_ENABLE_ALPHA_TEST_L = 64,
COMPILER_OPT_ENABLE_ALPHA_TEST_EQ = 128,
COMPILER_OPT_ENABLE_ALPHA_TEST_NE = 256,
};
static std::string get_vertex_interpreter()
{
const char* s =

View File

@ -621,13 +621,21 @@ bool GLGSRender::load_program()
current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
current_fragment_program.unnormalized_coords = 0; //unused
}
else if (m_program &&
(m_program != m_shader_interpreter.get() || interpreter_mode == shader_interpreter_mode::forced))
else if (m_program)
{
return true;
if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]]
{
return true;
}
if (interpreter_mode == shader_interpreter_mode::forced)
{
m_program = m_shader_interpreter.get(current_fp_metadata);
return true;
}
}
auto old_program = m_program;
const bool was_interpreter = m_shader_interpreter.is_interpreter(m_program);
if (interpreter_mode != shader_interpreter_mode::forced) [[likely]]
{
void* pipeline_properties = nullptr;
@ -660,12 +668,16 @@ bool GLGSRender::load_program()
m_program->sync();
}
}
else
{
m_program = nullptr;
}
if (!m_program && interpreter_mode != shader_interpreter_mode::disabled)
{
// Fall back to interpreter
m_program = m_shader_interpreter.get();
if (old_program != m_program)
m_program = m_shader_interpreter.get(current_fp_metadata);
if (was_interpreter != m_shader_interpreter.is_interpreter(m_program))
{
// Program has changed, reupload
m_interpreter_state = rsx::invalidate_pipeline_bits;
@ -689,7 +701,7 @@ void GLGSRender::load_program_env()
const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty);
const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty);
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
const bool update_instruction_buffers = (!!m_interpreter_state && m_program == m_shader_interpreter.get());
const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program));
m_program->use();

View File

@ -44,46 +44,70 @@ namespace gl
void shader_interpreter::create()
{
texture_pools[0].create(shader::type::vertex);
texture_pools[1].create(shader::type::fragment);
build_vs();
build_fs();
program_handle.create().
attach(vs).
attach(fs).
link();
program_handle.uniforms[0] = GL_STREAM_BUFFER_START + 0;
program_handle.uniforms[1] = GL_STREAM_BUFFER_START + 1;
// Initialize texture bindings
int assigned = 0;
auto& allocator = texture_pools[1];
const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" };
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < allocator.pools[i].pool_size; ++j)
{
allocator.pools[i].allocate(assigned++);
}
program_handle.uniforms[type_names[i]] = allocator.pools[i].allocated;
}
build_program(::program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES);
build_program(::program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES | ::program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT);
}
void shader_interpreter::destroy()
{
program_handle.remove();
vs.remove();
fs.remove();
for (auto& prog : m_program_cache)
{
prog.second->fs.remove();
prog.second->prog.remove();
}
m_vs.remove();
}
glsl::program* shader_interpreter::get()
glsl::program* shader_interpreter::get(const interpreter::program_metadata& metadata)
{
return &program_handle;
// Build options
u64 opt = 0;
if (rsx::method_registers.alpha_test_enabled()) [[unlikely]]
{
switch (rsx::method_registers.alpha_func())
{
case rsx::comparison_function::always:
break;
case rsx::comparison_function::never:
return nullptr;
case rsx::comparison_function::greater_or_equal:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE;
break;
case rsx::comparison_function::greater:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G;
break;
case rsx::comparison_function::less_or_equal:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE;
break;
case rsx::comparison_function::less:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L;
break;
case rsx::comparison_function::equal:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ;
break;
case rsx::comparison_function::not_equal:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE;
break;
}
}
if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT;
if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT;
if (metadata.referenced_textures_mask) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES;
if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]]
{
m_current_interpreter = it->second.get();
}
else
{
m_current_interpreter = build_program(opt);
}
return &m_current_interpreter->prog;
}
void shader_interpreter::build_vs()
@ -124,43 +148,47 @@ namespace gl
builder << program_common::interpreter::get_vertex_interpreter();
const std::string s = builder.str();
vs.create(glsl::shader::type::vertex);
vs.source(s);
vs.compile();
m_vs.create(glsl::shader::type::vertex);
m_vs.source(s);
m_vs.compile();
}
void shader_interpreter::build_fs()
void shader_interpreter::build_fs(u64 compiler_options, interpreter::cached_program& prog_data)
{
// Allocate TIUs
auto& allocator = texture_pools[1];
if (allocator.max_image_units >= 32)
auto& allocator = prog_data.allocator;
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
{
// 16 + 4 + 4 + 4
allocator.allocate(4); // 1D
allocator.allocate(16); // 2D
allocator.allocate(4); // CUBE
allocator.allocate(4); // 3D
}
else if (allocator.max_image_units >= 24)
{
// 16 + 4 + 2 + 2
allocator.allocate(2); // 1D
allocator.allocate(16); // 2D
allocator.allocate(2); // CUBE
allocator.allocate(4); // 3D
}
else if (allocator.max_image_units >= 16)
{
// 10 + 2 + 2 + 2
allocator.allocate(2); // 1D
allocator.allocate(10); // 2D
allocator.allocate(2); // CUBE
allocator.allocate(2); // 3D
}
else
{
// Unusable
rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter.");
allocator.create(glsl::shader::type::fragment);
if (allocator.max_image_units >= 32)
{
// 16 + 4 + 4 + 4
allocator.allocate(4); // 1D
allocator.allocate(16); // 2D
allocator.allocate(4); // CUBE
allocator.allocate(4); // 3D
}
else if (allocator.max_image_units >= 24)
{
// 16 + 4 + 2 + 2
allocator.allocate(2); // 1D
allocator.allocate(16); // 2D
allocator.allocate(2); // CUBE
allocator.allocate(4); // 3D
}
else if (allocator.max_image_units >= 16)
{
// 10 + 2 + 2 + 2
allocator.allocate(2); // 1D
allocator.allocate(10); // 2D
allocator.allocate(2); // CUBE
allocator.allocate(2); // 3D
}
else
{
// Unusable
rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter.");
}
}
::glsl::shader_properties properties{};
@ -182,18 +210,67 @@ namespace gl
::glsl::insert_subheader_block(builder);
comp.insertConstants(builder);
const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" };
for (int i = 0; i < 4; ++i)
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE)
{
builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n";
builder << "#define ALPHA_TEST_GEQUAL\n";
}
builder << "\n"
"#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n"
"#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n"
"#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n"
"#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n"
"#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n";
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G)
{
builder << "#define ALPHA_TEST_GREATER\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE)
{
builder << "#define ALPHA_TEST_LEQUAL\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L)
{
builder << "#define ALPHA_TEST_LESS\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ)
{
builder << "#define ALPHA_TEST_EQUAL\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE)
{
builder << "#define ALPHA_TEST_NEQUAL\n";
}
if (!(compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT))
{
builder << "#define WITH_HALF_OUTPUT_REGISTER\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT)
{
builder << "#define WITH_DEPTH_EXPORT\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
{
builder << "#define WITH_TEXTURES\n\n";
const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" };
for (int i = 0; i < 4; ++i)
{
builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n";
}
builder << "\n"
"#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n"
"#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n"
"#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n"
"#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n"
"#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n";
}
else if (compiler_options)
{
builder << "\n";
}
builder <<
"layout(std430, binding =" << GL_INTERPRETER_FRAGMENT_BLOCK << ") readonly restrict buffer FragmentInstructionBlock\n"
@ -211,22 +288,62 @@ namespace gl
builder << program_common::interpreter::get_fragment_interpreter();
const std::string s = builder.str();
fs.create(glsl::shader::type::fragment);
fs.source(s);
fs.compile();
prog_data.fs.create(glsl::shader::type::fragment);
prog_data.fs.source(s);
prog_data.fs.compile();
}
interpreter::cached_program* shader_interpreter::build_program(u64 compiler_options)
{
auto data = new interpreter::cached_program();
build_fs(compiler_options, *data);
data->prog.create().
attach(m_vs).
attach(data->fs).
link();
data->prog.uniforms[0] = GL_STREAM_BUFFER_START + 0;
data->prog.uniforms[1] = GL_STREAM_BUFFER_START + 1;
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
{
// Initialize texture bindings
int assigned = 0;
auto& allocator = data->allocator;
const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" };
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < allocator.pools[i].pool_size; ++j)
{
allocator.pools[i].allocate(assigned++);
}
data->prog.uniforms[type_names[i]] = allocator.pools[i].allocated;
}
}
m_program_cache[compiler_options].reset(data);
return data;
}
bool shader_interpreter::is_interpreter(const glsl::program* program)
{
return (program == &m_current_interpreter->prog);
}
void shader_interpreter::update_fragment_textures(
const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, 16>& descriptors,
u16 reference_mask, u32* out)
{
if (reference_mask == 0)
if (reference_mask == 0 || !m_current_interpreter)
{
return;
}
// Reset allocation
auto& allocator = texture_pools[1];
auto& allocator = m_current_interpreter->allocator;
for (unsigned i = 0; i < 4; ++i)
{
allocator.pools[i].num_used = 0;
@ -306,9 +423,9 @@ namespace gl
}
}
if (allocator.pools[0].flags) program_handle.uniforms["sampler1D_array"] = allocator.pools[0].allocated;
if (allocator.pools[1].flags) program_handle.uniforms["sampler2D_array"] = allocator.pools[1].allocated;
if (allocator.pools[2].flags) program_handle.uniforms["samplerCube_array"] = allocator.pools[2].allocated;
if (allocator.pools[3].flags) program_handle.uniforms["sampler3D_array"] = allocator.pools[3].allocated;
if (allocator.pools[0].flags) m_current_interpreter->prog.uniforms["sampler1D_array"] = allocator.pools[0].allocated;
if (allocator.pools[1].flags) m_current_interpreter->prog.uniforms["sampler2D_array"] = allocator.pools[1].allocated;
if (allocator.pools[2].flags) m_current_interpreter->prog.uniforms["samplerCube_array"] = allocator.pools[2].allocated;
if (allocator.pools[3].flags) m_current_interpreter->prog.uniforms["sampler3D_array"] = allocator.pools[3].allocated;
}
}

View File

@ -1,10 +1,13 @@
#pragma once
#include "GLHelpers.h"
#include "../Common/ProgramStateCache.h"
namespace gl
{
namespace interpreter
{
using program_metadata = program_hash_util::fragment_program_utils::fragment_program_metadata;
enum class texture_pool_flags
{
dirty = 1
@ -48,17 +51,25 @@ namespace gl
void create(::gl::glsl::shader::type domain);
void allocate(int size);
};
struct cached_program
{
glsl::shader fs;
glsl::program prog;
texture_pool_allocator allocator;
};
}
class shader_interpreter
{
glsl::shader vs;
glsl::shader fs;
glsl::program program_handle;
interpreter::texture_pool_allocator texture_pools[2];
glsl::shader m_vs;
std::unordered_map<u64, std::unique_ptr<interpreter::cached_program>> m_program_cache;
void build_vs();
void build_fs();
void build_fs(u64 compiler_options, interpreter::cached_program& prog_data);
interpreter::cached_program* build_program(u64 compiler_options);
interpreter::cached_program* m_current_interpreter = nullptr;
public:
void create();
@ -66,6 +77,7 @@ namespace gl
void update_fragment_textures(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, 16>& descriptors, u16 reference_mask, u32* out);
glsl::program* get();
glsl::program* get(const interpreter::program_metadata& fp_metadata);
bool is_interpreter(const glsl::program* program);
};
}