diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc index d461cbfd2..52e2b9c4b 100644 --- a/src/xenia/gpu/gl4/gl4_shader.cc +++ b/src/xenia/gpu/gl4/gl4_shader.cc @@ -18,9 +18,7 @@ namespace gl4 { GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count) - : Shader(shader_type, data_hash, dword_ptr, dword_count), - program_(0), - vao_(0) {} + : Shader(shader_type, data_hash, dword_ptr, dword_count) {} GL4Shader::~GL4Shader() { glDeleteProgram(program_); @@ -28,21 +26,29 @@ GL4Shader::~GL4Shader() { } bool GL4Shader::Prepare() { - if (!Shader::Prepare()) { - return false; - } - // Build static vertex array descriptor. if (!PrepareVertexArrayObject()) { XELOGE("Unable to prepare vertex shader array object"); return false; } - if (!CompileProgram()) { - return false; + bool success = true; + if (!CompileShader()) { + host_error_log_ = GetShaderInfoLog(); + success = false; + } + if (success && !LinkProgram()) { + host_error_log_ = GetProgramInfoLog(); + success = false; } - return true; + if (success) { + host_binary_ = GetBinary(); + host_disassembly_ = GetHostDisasmNV(host_binary_); + } + is_valid_ = success; + + return success; } bool GL4Shader::PrepareVertexArrayObject() { @@ -52,23 +58,24 @@ bool GL4Shader::PrepareVertexArrayObject() { for (const auto& attrib : vertex_binding.attributes) { auto comp_count = GetVertexFormatComponentCount( attrib.fetch_instr.attributes.data_format); - GLenum comp_type; + GLenum comp_type = 0; bool is_signed = attrib.fetch_instr.attributes.is_signed; switch (attrib.fetch_instr.attributes.data_format) { case VertexFormat::k_8_8_8_8: comp_type = is_signed ? GL_BYTE : GL_UNSIGNED_BYTE; break; case VertexFormat::k_2_10_10_10: - comp_type = is_signed ? GL_INT_2_10_10_10_REV - : GL_UNSIGNED_INT_2_10_10_10_REV; + comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT; + comp_count = 1; break; case VertexFormat::k_10_11_11: - // assert_false(is_signed); - XELOGW("Signed k_10_11_11 vertex format not supported by GL"); - comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV; + comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT; + comp_count = 1; + break; + case VertexFormat::k_11_11_10: + assert_true(is_signed); + comp_type = is_signed ? GL_R11F_G11F_B10F : 0; break; - /*case VertexFormat::k_11_11_10: - break;*/ case VertexFormat::k_16_16: comp_type = is_signed ? GL_SHORT : GL_UNSIGNED_SHORT; break; @@ -120,83 +127,137 @@ bool GL4Shader::PrepareVertexArrayObject() { return true; } -bool GL4Shader::CompileProgram() { +bool GL4Shader::CompileShader() { assert_zero(program_); - // Give source to GL. - auto source_str = GetTranslatedBinaryString(); - auto source_str_ptr = source_str.c_str(); - program_ = glCreateShaderProgramv(shader_type_ == ShaderType::kVertex - ? GL_VERTEX_SHADER - : GL_FRAGMENT_SHADER, - 1, &source_str_ptr); - if (!program_) { - XELOGE("Unable to create shader program"); + shader_ = + glCreateShader(shader_type_ == ShaderType::kVertex ? GL_VERTEX_SHADER + : GL_FRAGMENT_SHADER); + if (!shader_) { + XELOGE("OpenGL could not create a shader object!"); return false; } - // Output info log. - // log_length includes the null character. - GLint log_length = 0; - glGetProgramiv(program_, GL_INFO_LOG_LENGTH, &log_length); - std::string info_log; - if (log_length > 0) { - info_log.resize(log_length - 1); - glGetProgramInfoLog(program_, log_length, &log_length, &info_log[0]); + auto source_str = GetTranslatedBinaryString(); + auto source_str_ptr = source_str.c_str(); + GLint source_length = GLint(source_str.length()); + glShaderSource(shader_, 1, &source_str_ptr, &source_length); + glCompileShader(shader_); + + GLint status = 0; + glGetShaderiv(shader_, GL_COMPILE_STATUS, &status); + + return status == GL_TRUE; +} + +bool GL4Shader::LinkProgram() { + program_ = glCreateProgram(); + if (!program_) { + XELOGE("OpenGL could not create a shader program!"); + return false; } - if (!info_log.empty()) { - XELOGD("Shader log: %s", info_log.c_str()); + glAttachShader(program_, shader_); + + // Enable TFB + if (shader_type_ == ShaderType::kVertex) { + const GLchar* feedbackVaryings = "gl_Position"; + glTransformFeedbackVaryings(program_, 1, &feedbackVaryings, + GL_SEPARATE_ATTRIBS); } - // Get error log, if we failed to link. + glProgramParameteri(program_, GL_PROGRAM_SEPARABLE, GL_TRUE); + glLinkProgram(program_); + GLint link_status = 0; glGetProgramiv(program_, GL_LINK_STATUS, &link_status); if (!link_status) { - host_error_log_ = std::move(info_log); assert_always("Unable to link generated shader"); return false; } - // Get program binary, if it's available. - GLint binary_length = 0; - glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &binary_length); - if (binary_length) { - host_binary_.resize(binary_length); - GLenum binary_format; - glGetProgramBinary(program_, binary_length, &binary_length, &binary_format, - host_binary_.data()); - - // If we are on nvidia, we can find the disassembly string. - // I haven't been able to figure out from the format how to do this - // without a search like this. - const char* disasm_start = nullptr; - size_t search_offset = 0; - char* search_start = reinterpret_cast(host_binary_.data()); - while (true) { - auto p = reinterpret_cast( - memchr(host_binary_.data() + search_offset, '!', - host_binary_.size() - search_offset)); - if (!p) { - break; - } - if (p[0] == '!' && p[1] == '!' && p[2] == 'N' && p[3] == 'V') { - disasm_start = p; - break; - } - search_offset = p - search_start; - ++search_offset; - } - if (disasm_start) { - host_disassembly_ = std::string(disasm_start); - } else { - host_disassembly_ = std::string("Shader disassembly not available."); - } - } - return true; } +std::string GL4Shader::GetShaderInfoLog() { + if (!shader_) { + return "GL4Shader::GetShaderInfoLog(): Program is NULL"; + } + + std::string log; + GLint log_length = 0; + glGetShaderiv(shader_, GL_INFO_LOG_LENGTH, &log_length); + if (log_length > 0) { + log.resize(log_length - 1); + glGetShaderInfoLog(shader_, log_length, &log_length, &log[0]); + } + + return log; +} + +std::string GL4Shader::GetProgramInfoLog() { + if (!program_) { + return "GL4Shader::GetProgramInfoLog(): Program is NULL"; + } + + std::string log; + GLint log_length = 0; + glGetProgramiv(program_, GL_INFO_LOG_LENGTH, &log_length); + if (log_length > 0) { + log.resize(log_length - 1); + glGetProgramInfoLog(program_, log_length, &log_length, &log[0]); + } + + return log; +} + +std::vector GL4Shader::GetBinary() { + std::vector binary; + + // Get program binary, if it's available. + GLint binary_length = 0; + glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &binary_length); + if (binary_length) { + binary.resize(binary_length); + GLenum binary_format; + glGetProgramBinary(program_, binary_length, &binary_length, &binary_format, + binary.data()); + } + + return binary; +} + +std::string GL4Shader::GetHostDisasmNV(const std::vector& binary) { + // If we are on nvidia, we can find the disassembly string. + // I haven't been able to figure out from the format how to do this + // without a search like this. + std::string disasm; + + const char* disasm_start = nullptr; + size_t search_offset = 0; + const char* search_start = reinterpret_cast(binary.data()); + while (true) { + auto p = reinterpret_cast(memchr( + binary.data() + search_offset, '!', binary.size() - search_offset)); + if (!p) { + break; + } + if (p[0] == '!' && p[1] == '!' && p[2] == 'N' && p[3] == 'V') { + disasm_start = p; + break; + } + search_offset = p - search_start; + ++search_offset; + } + if (disasm_start) { + disasm = std::string(disasm_start); + } else { + disasm = std::string("Shader disassembly not available."); + } + + return disasm; +} + } // namespace gl4 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/gl4/gl4_shader.h b/src/xenia/gpu/gl4/gl4_shader.h index 98346f122..96a62f718 100644 --- a/src/xenia/gpu/gl4/gl4_shader.h +++ b/src/xenia/gpu/gl4/gl4_shader.h @@ -26,16 +26,24 @@ class GL4Shader : public Shader { ~GL4Shader() override; GLuint program() const { return program_; } + GLuint shader() const { return shader_; } GLuint vao() const { return vao_; } - bool Prepare() override; + bool Prepare(); protected: bool PrepareVertexArrayObject(); - bool CompileProgram(); + bool CompileShader(); + bool LinkProgram(); - GLuint program_; - GLuint vao_; + std::string GetShaderInfoLog(); + std::string GetProgramInfoLog(); + std::vector GetBinary(); + static std::string GetHostDisasmNV(const std::vector& binary); + + GLuint program_ = 0; + GLuint shader_ = 0; + GLuint vao_ = 0; }; } // namespace gl4 diff --git a/src/xenia/gpu/glsl_shader_translator.cc b/src/xenia/gpu/glsl_shader_translator.cc index 442547a9c..cf050038a 100644 --- a/src/xenia/gpu/glsl_shader_translator.cc +++ b/src/xenia/gpu/glsl_shader_translator.cc @@ -25,7 +25,7 @@ constexpr int kMaxTemporaryRegisters = 64; source_.Append(depth_prefix_); \ source_.AppendFormat(__VA_ARGS__) -const char* GetVertexFormatTypeName(VertexFormat format) { +const char* GetVertexFormatTypeName(VertexFormat format, bool is_signed) { switch (format) { case VertexFormat::k_32: case VertexFormat::k_32_FLOAT: @@ -36,11 +36,13 @@ const char* GetVertexFormatTypeName(VertexFormat format) { case VertexFormat::k_32_32_FLOAT: return "vec2"; case VertexFormat::k_10_11_11: + return is_signed ? "int" : "uint"; + case VertexFormat::k_2_10_10_10: + return is_signed ? "int" : "uint"; case VertexFormat::k_11_11_10: case VertexFormat::k_32_32_32_FLOAT: return "vec3"; case VertexFormat::k_8_8_8_8: - case VertexFormat::k_2_10_10_10: case VertexFormat::k_16_16_16_16: case VertexFormat::k_32_32_32_32: case VertexFormat::k_16_16_16_16_FLOAT: @@ -189,6 +191,39 @@ out gl_PerVertex { }; layout(location = 0) flat out uint draw_id; layout(location = 1) out VertexData vtx; + +vec3 get_10_11_11_u(const uint data_in) { + vec3 vec; + vec.x = bitfieldExtract(data_in, 0, 10); + vec.y = bitfieldExtract(data_in, 10, 11); + vec.z = bitfieldExtract(data_in, 21, 11); + return vec; +} +vec3 get_10_11_11_s(const int data_in) { + vec3 vec; + vec.x = bitfieldExtract(data_in, 0, 10); + vec.y = bitfieldExtract(data_in, 10, 11); + vec.z = bitfieldExtract(data_in, 21, 11); + return vec; +} + +vec4 get_2_10_10_10_u(const uint data_in) { + vec4 vec; + vec.x = bitfieldExtract(data_in, 0, 10); + vec.y = bitfieldExtract(data_in, 10, 10); + vec.z = bitfieldExtract(data_in, 20, 10); + vec.w = bitfieldExtract(data_in, 30, 2 ); + return vec; +} +vec4 get_2_10_10_10_s(const int data_in) { + vec4 vec; + vec.x = bitfieldExtract(data_in, 0, 10); + vec.y = bitfieldExtract(data_in, 10, 10); + vec.z = bitfieldExtract(data_in, 20, 10); + vec.w = bitfieldExtract(data_in, 30, 2 ); + return vec; +} + vec4 applyTransform(const in StateData state, vec4 pos) { if (state.vtx_fmt.w == 0.0) { // w is 1/W0, so fix it. @@ -262,7 +297,8 @@ void main() { } defined_locations.insert(key); const char* type_name = - GetVertexFormatTypeName(attrib.fetch_instr.attributes.data_format); + GetVertexFormatTypeName(attrib.fetch_instr.attributes.data_format, + attrib.fetch_instr.attributes.is_signed); EmitSource("layout(location = %d) in %s vf%u_%d;\n", attrib.attrib_index, type_name, binding.fetch_constant, attrib.fetch_instr.attributes.offset); @@ -557,16 +593,29 @@ void GlslShaderTranslator::ProcessVertexFetchInstruction( } switch (instr.opcode) { - case FetchOpcode::kVertexFetch: + case FetchOpcode::kVertexFetch: { EmitSourceDepth("pv."); for (int i = 0; i < GetVertexFormatComponentCount(instr.attributes.data_format); ++i) { EmitSource("%c", GetCharForComponentIndex(i)); } - EmitSource(" = vf%u_%d;\n", instr.operands[1].storage_index, - instr.attributes.offset); - break; + + auto format = instr.attributes.data_format; + if (format == VertexFormat::k_10_11_11) { + // GL doesn't support this format as a fetch type, so convert it. + EmitSource(" = get_10_11_11_%c(vf%u_%d);\n", + instr.attributes.is_signed ? 's' : 'u', + instr.operands[1].storage_index, instr.attributes.offset); + } else if (format == VertexFormat::k_2_10_10_10) { + EmitSource(" = get_2_10_10_10_%c(vf%u_%d);\n", + instr.attributes.is_signed ? 's' : 'u', + instr.operands[1].storage_index, instr.attributes.offset); + } else { + EmitSource(" = vf%u_%d;\n", instr.operands[1].storage_index, + instr.attributes.offset); + } + } break; default: assert_always(); break; @@ -834,7 +883,7 @@ void GlslShaderTranslator::EmitStoreResult(const InstructionResult& result, EmitSourceDepth("gl_FragDepth"); break; case InstructionStorageTarget::kNone: - break; + return; } if (uses_storage_index) { switch (result.storage_addressing_mode) {