From 8a70316275c8cc3e7310b1d612ecdb38077d0153 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 31 Dec 2014 02:50:21 -0800 Subject: [PATCH] Tweaking. --- src/poly/logging.cc | 9 ++-- src/xenia/gpu/gl4/command_processor.cc | 52 ++++++++++++++++++---- src/xenia/gpu/gl4/gl4_shader.cc | 6 +-- src/xenia/gpu/gl4/gl4_shader_translator.cc | 6 ++- src/xenia/gpu/gl4/texture_cache.cc | 4 +- src/xenia/gpu/shader.cc | 6 ++- src/xenia/gpu/xenos.h | 10 ++++- 7 files changed, 69 insertions(+), 24 deletions(-) diff --git a/src/poly/logging.cc b/src/poly/logging.cc index d7095d43c..b10760334 100644 --- a/src/poly/logging.cc +++ b/src/poly/logging.cc @@ -62,14 +62,15 @@ void format_log_line(char* buffer, size_t buffer_count, const char* file_path, } } +thread_local char log_buffer[2048]; + void log_line(const char* file_path, const uint32_t line_number, const char level_char, const char* fmt, ...) { // SCOPE_profile_cpu_i("emu", "log_line"); - char buffer[2048]; va_list args; va_start(args, fmt); - format_log_line(buffer, poly::countof(buffer), file_path, line_number, + format_log_line(log_buffer, poly::countof(log_buffer), file_path, line_number, level_char, fmt, args); va_end(args); @@ -77,9 +78,9 @@ void log_line(const char* file_path, const uint32_t line_number, log_lock.lock(); } #if 0 // defined(OutputDebugString) - OutputDebugStringA(buffer); + OutputDebugStringA(log_buffer); #else - fprintf(stdout, "%s", buffer); + fprintf(stdout, "%s", log_buffer); fflush(stdout); #endif // OutputDebugString if (!FLAGS_fast_stdout) { diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index ba868daae..16e3f6c49 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -2167,7 +2167,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { auto copy_dest_format = static_cast((copy_dest_info >> 7) & 0x3F); uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7; - assert_true(copy_dest_number == 0); + // assert_true(copy_dest_number == 0); // ? uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F; assert_true(copy_dest_bias == 0); uint32_t copy_dest_swap = (copy_dest_info >> 25) & 0x1; @@ -2233,6 +2233,14 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { read_format = copy_dest_swap ? GL_BGRA : GL_RGBA; read_type = GL_UNSIGNED_BYTE; break; + case ColorFormat::k_16_16_16_16_FLOAT: + read_format = GL_RGBA; + read_type = GL_HALF_FLOAT; + break; + case ColorFormat::k_32_FLOAT: + read_format = GL_R32F; + read_type = GL_FLOAT; + break; default: assert_unhandled_case(copy_dest_format); return false; @@ -2251,7 +2259,8 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { glPixelStorei(GL_PACK_SWAP_BYTES, GL_TRUE); break; default: - assert_unhandled_case(copy_dest_endian); + //assert_unhandled_case(copy_dest_endian); + glPixelStorei(GL_PACK_SWAP_BYTES, GL_TRUE); return false; } @@ -2315,13 +2324,13 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { if (depth_clear_enabled) { // Clear the current depth buffer. // TODO(benvanik): verify format. - union { - uint32_t uint_value; - GLfloat float_value; - } depth = {copy_depth_clear & 0xFFFFFF00}; + GLfloat depth = {(copy_depth_clear & 0xFFFFFF00) / float(0xFFFFFF00)}; GLint stencil = copy_depth_clear & 0xFF; - glClearNamedFramebufferfi(source_framebuffer->framebuffer, GL_DEPTH_STENCIL, - depth.float_value, stencil); + // HACK: this should work, but throws INVALID_ENUM on nvidia drivers. + //glClearNamedFramebufferfi(source_framebuffer->framebuffer, GL_DEPTH_STENCIL, + // depth, stencil); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, source_framebuffer->framebuffer); + glClearBufferfi(GL_DEPTH_STENCIL, 0, depth, stencil); } return true; @@ -2361,6 +2370,29 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch, case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: internal_format = GL_RGBA8; break; + case ColorRenderTargetFormat::k_2_10_10_10: + case ColorRenderTargetFormat::k_2_10_10_10_unknown: + internal_format = GL_RGB10_A2UI; + break; + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: + internal_format = GL_RGB10_A2; + break; + case ColorRenderTargetFormat::k_16_16: + internal_format = GL_RG16; + break; + case ColorRenderTargetFormat::k_16_16_FLOAT: + internal_format = GL_RG16F; + break; + case ColorRenderTargetFormat::k_16_16_16_16: + internal_format = GL_RGBA16; + break; + case ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + internal_format = GL_RGBA16F; + break; + case ColorRenderTargetFormat::k_32_FLOAT: + internal_format = GL_R32F; + break; default: assert_unhandled_case(format); return 0; @@ -2399,7 +2431,9 @@ GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch, internal_format = GL_DEPTH24_STENCIL8; break; case DepthRenderTargetFormat::kD24FS8: - // TODO(benvanik): not supported in GL? + // TODO(benvanik): not supported in GL? + internal_format = GL_DEPTH24_STENCIL8; + break; default: assert_unhandled_case(format); return 0; diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc index 9a46bd161..57ba19db6 100644 --- a/src/xenia/gpu/gl4/gl4_shader.cc +++ b/src/xenia/gpu/gl4/gl4_shader.cc @@ -127,11 +127,7 @@ bool GL4Shader::PreparePixelShader( "layout(location = 0) in VertexData vtx;\n" "layout(location = 0) out vec4 oC[4];\n" "void processFragment();\n" - "void main() {\n" - " for (int i = 0; i < oC.length(); ++i) {\n" - " oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n" - " }\n" + - (program_cntl.ps_export_depth ? " gl_FragDepth = 0.0;\n" : "") + + "void main() {\n" + " processFragment();\n" "}\n"; diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.cc b/src/xenia/gpu/gl4/gl4_shader_translator.cc index 3e0f33b35..194427a4b 100644 --- a/src/xenia/gpu/gl4/gl4_shader_translator.cc +++ b/src/xenia/gpu/gl4/gl4_shader_translator.cc @@ -69,6 +69,7 @@ std::string GL4ShaderTranslator::TranslateVertexShader( Reset(vertex_shader); // Normal shaders only, for now. + // TODO(benvanik): transform feedback/memexport. assert_true(program_cntl.vs_export_mode == 0); // Add vertex shader input. @@ -199,7 +200,8 @@ void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) { Append("gl_Position"); break; case 63: - Append("gl_PointSize"); + // Write to t, as we need to splice just x out of it. + Append("t"); break; default: // Varying. @@ -242,6 +244,8 @@ void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask, if (num == 61) { // gl_FragDepth handling to just get x from the temp result. Append(" gl_FragDepth = t.x;\n"); + } else if (num == 63) { + Append(" gl_PointSize = t.x;\n"); } else if (mask != 0xF) { // Masking. Append(" "); diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc index 0aff172bd..56df1efc6 100644 --- a/src/xenia/gpu/gl4/texture_cache.cc +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -281,7 +281,7 @@ bool TextureCache::UploadTexture2D(GLuint texture, void* host_base, switch (texture_info.format) { case TextureFormat::k_8: internal_format = GL_R8; - format = GL_R; + format = GL_RED; type = GL_UNSIGNED_BYTE; break; case TextureFormat::k_1_5_5_5: @@ -341,7 +341,7 @@ bool TextureCache::UploadTexture2D(GLuint texture, void* host_base, break; case TextureFormat::k_32_FLOAT: internal_format = GL_R32F; - format = GL_R; + format = GL_RED; type = GL_FLOAT; break; case TextureFormat::k_32_32_FLOAT: diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc index 42365b9f9..a3812b814 100644 --- a/src/xenia/gpu/shader.cc +++ b/src/xenia/gpu/shader.cc @@ -153,8 +153,6 @@ void Shader::GatherExec(const instr_cf_exec_t* cf) { } void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { - assert_true(shader_type_ == ShaderType::kVertex); - // dst_reg/dst_swiz // src_reg/src_swiz // format = a2xx_sq_surfaceformat @@ -166,6 +164,10 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { // num_format_all ? integer : fraction // exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default + if (!vtx->must_be_one) { + return; + } + // Sometimes games have fetches that just produce constants. We can // ignore those. uint32_t dst_swiz = vtx->dst_swiz; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index e89e4ba97..0c3d75fca 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -81,7 +81,15 @@ enum class MsaaSamples : uint32_t { enum class ColorRenderTargetFormat : uint32_t { k_8_8_8_8 = 0, // D3DFMT_A8R8G8B8 (or ABGR?) k_8_8_8_8_GAMMA = 1, // D3DFMT_A8R8G8B8 with gamma correction - // ... + k_2_10_10_10 = 2, + k_2_10_10_10_FLOAT = 3, + k_16_16 = 4, + k_16_16_16_16 = 5, + k_16_16_FLOAT = 6, + k_16_16_16_16_FLOAT = 7, + k_2_10_10_10_unknown = 10, + k_2_10_10_10_FLOAT_unknown = 12, + k_32_FLOAT = 14, }; enum class DepthRenderTargetFormat : uint32_t {