From da3eef1019fb9899cdde34d423ce19c4a093bf60 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 25 Nov 2013 00:06:29 +0000 Subject: [PATCH 1/6] Fix the issue with COLOROUT not being defined anymore. Fix a issue where Mali shader compiler is idiotic in finding an overload for the mix function. --- Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp | 4 ++-- Source/Core/VideoBackends/OGL/Src/RasterFont.cpp | 2 +- Source/Core/VideoBackends/OGL/Src/Render.cpp | 2 +- Source/Core/VideoBackends/OGL/Src/TextureCache.cpp | 4 ++-- Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp | 6 +++--- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp index 1530016336..1ce5e552a2 100644 --- a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp @@ -200,7 +200,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms char ps_rgba6_to_rgb8[] = "uniform sampler2DRect samp9;\n" - "COLOROUT(ocol0)\n" + "out vec4 ocol0;\n" "void main()\n" "{\n" " ivec4 src6 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 63.f));\n" @@ -214,7 +214,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms char ps_rgb8_to_rgba6[] = "uniform sampler2DRect samp9;\n" - "COLOROUT(ocol0)\n" + "out vec4 ocol0;\n" "void main()\n" "{\n" " ivec4 src8 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 255.f));\n" diff --git a/Source/Core/VideoBackends/OGL/Src/RasterFont.cpp b/Source/Core/VideoBackends/OGL/Src/RasterFont.cpp index 42b8963784..d3e4272d28 100644 --- a/Source/Core/VideoBackends/OGL/Src/RasterFont.cpp +++ b/Source/Core/VideoBackends/OGL/Src/RasterFont.cpp @@ -127,7 +127,7 @@ static const char *s_fragmentShaderSrc = "uniform sampler2D samp8;\n" "uniform vec4 color;\n" "VARYIN vec2 uv0;\n" - "COLOROUT(ocol0)\n" + "out vec4 ocol0;\n" "void main(void) {\n" " ocol0 = texture(samp8,uv0) * color;\n" "}\n"; diff --git a/Source/Core/VideoBackends/OGL/Src/Render.cpp b/Source/Core/VideoBackends/OGL/Src/Render.cpp index 8f95f5575e..6dd86866fd 100644 --- a/Source/Core/VideoBackends/OGL/Src/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Src/Render.cpp @@ -665,7 +665,7 @@ void Renderer::Init() " c = vec4(color0, 1.0);\n" "}\n", "VARYIN vec4 c;\n" - "COLOROUT(ocol0)\n" + "out vec4 ocol0;\n" "void main(void) {\n" " ocol0 = c;\n" "}\n"); diff --git a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp index eb2198ee7f..aa197c293c 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp @@ -406,7 +406,7 @@ TextureCache::TextureCache() "uniform sampler2DRect samp9;\n" "uniform vec4 colmat[7];\n" "VARYIN vec2 uv0;\n" - "COLOROUT(ocol0)\n" + "out vec4 ocol0;\n" "\n" "void main(){\n" " vec4 texcol = texture2DRect(samp9, uv0);\n" @@ -418,7 +418,7 @@ TextureCache::TextureCache() "uniform sampler2DRect samp9;\n" "uniform vec4 colmat[5];\n" "VARYIN vec2 uv0;\n" - "COLOROUT(ocol0)\n" + "out vec4 ocol0;\n" "\n" "void main(){\n" " vec4 texcol = texture2DRect(samp9, uv0);\n" diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index 56c1f2e484..f906d2219e 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -77,7 +77,7 @@ void CreatePrograms() const char *FProgramRgbToYuyv = "uniform sampler2DRect samp9;\n" "VARYIN vec2 uv0;\n" - "COLOROUT(ocol0)\n" + "out vec4 ocol0;\n" "void main()\n" "{\n" " vec3 c0 = texture2DRect(samp9, uv0 - dFdx(uv0) * 0.25).rgb;\n" @@ -106,7 +106,7 @@ void CreatePrograms() const char *FProgramYuyvToRgb = "uniform sampler2DRect samp9;\n" "VARYIN vec2 uv0;\n" - "COLOROUT(ocol0)\n" + "out vec4 ocol0;\n" "void main()\n" "{\n" " ivec2 uv = ivec2(gl_FragCoord.xy);\n" @@ -118,7 +118,7 @@ void CreatePrograms() " ivec2 ts = textureSize(samp9);\n" " vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1));\n" #endif - " float y = mix(c0.b, c0.r, uv.x & 1);\n" + " float y = mix(c0.b, c0.r, (uv.x & 1) == 1);\n" " float yComp = 1.164 * (y - 0.0625);\n" " float uComp = c0.g - 0.5;\n" " float vComp = c0.a - 0.5;\n" diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index f107da783e..26e5cc0eba 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -304,7 +304,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (ApiType == API_OPENGL) { - out.Write("COLOROUT(ocol0)\n"); + out.Write("out vec4 ocol0;\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) out.Write("out vec4 ocol1;\n"); From 7ed8e6a29c5f8a0f619104bc68b8854feda98fac Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 24 Nov 2013 22:45:48 -0600 Subject: [PATCH 2/6] [Android] Fix the check for the Qualcomm graphics driver version for v53 drivers with the screen being rotated 90 degrees. Initialize the OpenGL information grabbing only once. Check for v14 Qualcomm drivers and spit out an error if the user tries selecting OpenGL ES 3. --- Source/Android/res/values-ja/strings.xml | 1 + Source/Android/res/values/strings.xml | 1 + .../settings/VideoSettingsFragment.java | 56 +++++++++++++++---- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/Source/Android/res/values-ja/strings.xml b/Source/Android/res/values-ja/strings.xml index 77db4fee79..db9c1854fd 100644 --- a/Source/Android/res/values-ja/strings.xml +++ b/Source/Android/res/values-ja/strings.xml @@ -29,6 +29,7 @@ デバイスの互換性の警告 この電話は、NEON拡張をサポートしていません。 おそらくDolphinを実行することはできません。\nあなたはとにかくそれを実行してみますか? + デバイスはOpenGLES3のビデオドライバのバグがあります。\nあなたはとにかくそれを使用してみたいのですか? クリックされたファイル: %1$s diff --git a/Source/Android/res/values/strings.xml b/Source/Android/res/values/strings.xml index ca7157fd44..7ffc2bcfd2 100644 --- a/Source/Android/res/values/strings.xml +++ b/Source/Android/res/values/strings.xml @@ -29,6 +29,7 @@ Device Compatibility Warning Your phone doesn\'t support NEON which makes it incapable of running Dolphin Mobile?\nDo you want to try anyway? + Your device has known buggy video drivers for OpenGL ES 3.\nDo you want to try anyway? File clicked: %1$s diff --git a/Source/Android/src/org/dolphinemu/dolphinemu/settings/VideoSettingsFragment.java b/Source/Android/src/org/dolphinemu/dolphinemu/settings/VideoSettingsFragment.java index e181fc32bc..81d4d2dad7 100644 --- a/Source/Android/src/org/dolphinemu/dolphinemu/settings/VideoSettingsFragment.java +++ b/Source/Android/src/org/dolphinemu/dolphinemu/settings/VideoSettingsFragment.java @@ -7,6 +7,8 @@ package org.dolphinemu.dolphinemu.settings; import android.app.Activity; +import android.app.AlertDialog; +import android.content.DialogInterface; import android.content.SharedPreferences; import android.content.SharedPreferences.OnSharedPreferenceChangeListener; import android.os.Bundle; @@ -29,6 +31,7 @@ public final class VideoSettingsFragment extends PreferenceFragment public static String m_GLRenderer; public static String m_GLExtensions; public static float m_QualcommVersion; + public static boolean m_Inited = false; private Activity m_activity; /** @@ -147,20 +150,24 @@ public final class VideoSettingsFragment extends PreferenceFragment */ public static boolean SupportsGLES3() { - VersionCheck mbuffer = new VersionCheck(); - m_GLVersion = mbuffer.getVersion(); - m_GLVendor = mbuffer.getVendor(); - m_GLRenderer = mbuffer.getRenderer(); - m_GLExtensions = mbuffer.getExtensions(); - boolean mSupportsGLES3 = false; + if (!m_Inited) + { + VersionCheck mbuffer = new VersionCheck(); + m_GLVersion = mbuffer.getVersion(); + m_GLVendor = mbuffer.getVendor(); + m_GLRenderer = mbuffer.getRenderer(); + m_GLExtensions = mbuffer.getExtensions(); + m_Inited = true; + } + // Check for OpenGL ES 3 support (General case). if (m_GLVersion != null && m_GLVersion.contains("OpenGL ES 3.0")) mSupportsGLES3 = true; // Checking for OpenGL ES 3 support for certain Qualcomm devices. - if (!mSupportsGLES3 && m_GLVendor != null && m_GLVendor.equals("Qualcomm")) + if (m_GLVendor != null && m_GLVendor.equals("Qualcomm")) { if (m_GLRenderer.contains("Adreno (TM) 3")) { @@ -182,6 +189,7 @@ public final class VideoSettingsFragment extends PreferenceFragment mSupportsGLES3 = true; } } + return mSupportsGLES3; } @@ -253,10 +261,36 @@ public final class VideoSettingsFragment extends PreferenceFragment } else if (preference.getString(key, "Software Renderer").equals("OGL")) { - mainScreen.getPreference(0).setEnabled(true); - mainScreen.getPreference(1).setEnabled(true); - mainScreen.getPreference(3).setEnabled(true); - //mainScreen.getPreference(4).setEnabled(false); + // Create an alert telling them that their phone sucks + if (VideoSettingsFragment.SupportsGLES3() + && VideoSettingsFragment.m_GLVendor != null + && VideoSettingsFragment.m_GLVendor.equals("Qualcomm") + && VideoSettingsFragment.m_QualcommVersion == 14.0f) + { + AlertDialog.Builder builder = new AlertDialog.Builder(m_activity); + builder.setTitle(R.string.device_compat_warning); + builder.setMessage(R.string.device_gles3compat_warning_msg); + builder.setPositiveButton(R.string.yes, new DialogInterface.OnClickListener() { + public void onClick(DialogInterface dialog, int which) { + mainScreen.getPreference(0).setEnabled(true); + mainScreen.getPreference(1).setEnabled(true); + mainScreen.getPreference(3).setEnabled(true); + //mainScreen.getPreference(4).setEnabled(false); + } + }); + builder.setNegativeButton(R.string.no, new DialogInterface.OnClickListener() { + public void onClick(DialogInterface dialog, int which) + { + // Get an editor. + SharedPreferences.Editor editor = sPrefs.edit(); + editor.putString("gpuPref", "Software Renderer"); + editor.commit(); + videoBackends.setValue("Software Renderer"); + videoBackends.setSummary("Software Renderer"); + } + }); + builder.show(); + } } } } From 230e12ae8c022a8e24ecea3019623a7c0ebcd3c7 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 07:38:20 +0100 Subject: [PATCH 3/6] OpenGL: also remove VAO from xfb convertion We use attributeless rendering, so officially we have to bind _any_ VAO. As the state of this VAO doesn't matter, we don't have to switch it. Also fix an AMD issue as they don't like to render from an empty VAO. --- Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index f906d2219e..f653dfbf01 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -42,7 +42,6 @@ static SHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS]; static GLuint s_encode_VBO = 0; static GLuint s_encode_VAO = 0; -static GLuint s_decode_VAO = 0; static TargetRectangle s_cached_sourceRc; static const char *VProgram = @@ -177,9 +176,6 @@ void Init() s_cached_sourceRc.left = -1; s_cached_sourceRc.right = -1; - glGenVertexArrays(1, &s_decode_VAO ); - glBindVertexArray( s_decode_VAO ); - glActiveTexture(GL_TEXTURE0 + 9); glGenTextures(1, &s_srcTexture); glBindTexture(getFbType(), s_srcTexture); @@ -200,7 +196,6 @@ void Shutdown() glDeleteFramebuffers(1, &s_texConvFrameBuffer); glDeleteBuffers(1, &s_encode_VBO ); glDeleteVertexArrays(1, &s_encode_VAO ); - glDeleteVertexArrays(1, &s_decode_VAO ); s_rgbToYuyvProgram.Destroy(); s_yuyvToRgbProgram.Destroy(); @@ -405,7 +400,6 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur glViewport(0, 0, srcWidth, srcHeight); s_yuyvToRgbProgram.Bind(); - glBindVertexArray( s_decode_VAO ); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); FramebufferManager::SetFramebuffer(0); From db9c586356d402d3bf8ce70ece54526f8fcd9fc3 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 25 Nov 2013 16:56:04 -0600 Subject: [PATCH 4/6] Revert "jit: change our linking module to be able to handle arbitrary exit addresses" This shouldn't cause issues, but does in Windows. Revert for now. This reverts commit 1aa06b8fa4105d22cac0dd847230215099bbeb78. --- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 15 ++++------ Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 2 +- .../Core/Src/PowerPC/Jit64/Jit_Branch.cpp | 10 +++---- .../Core/Src/PowerPC/Jit64/Jit_Integer.cpp | 10 +++---- .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 2 +- .../Src/PowerPC/Jit64/Jit_SystemRegisters.cpp | 2 +- .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 13 ++++----- .../Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp | 18 +++++------- Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h | 4 +-- Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp | 13 ++++----- Source/Core/Core/Src/PowerPC/JitArm32/Jit.h | 2 +- .../Src/PowerPC/JitArm32/JitArm_Branch.cpp | 10 +++---- .../Src/PowerPC/JitArm32/JitArm_LoadStore.cpp | 2 +- .../JitArm32/JitArm_SystemRegisters.cpp | 2 +- .../Core/Core/Src/PowerPC/JitArmIL/IR_Arm.cpp | 11 ++++--- .../Core/Core/Src/PowerPC/JitArmIL/JitIL.cpp | 15 ++++------ Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.h | 4 +-- .../Core/Src/PowerPC/JitCommon/JitCache.cpp | 29 ++++++++++++------- .../Core/Src/PowerPC/JitCommon/JitCache.h | 11 +++---- 19 files changed, 83 insertions(+), 92 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 87fb248c1f..394c7bc86a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -276,7 +276,7 @@ void Jit64::Cleanup() ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst); } -void Jit64::WriteExit(u32 destination) +void Jit64::WriteExit(u32 destination, int exit_num) { Cleanup(); @@ -284,9 +284,8 @@ void Jit64::WriteExit(u32 destination) //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; - JitBlock::LinkData linkData; - linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); + b->exitAddress[exit_num] = destination; + b->exitPtrs[exit_num] = GetWritableCodePtr(); // Link opportunity! if (jo.enableBlocklink) @@ -296,14 +295,12 @@ void Jit64::WriteExit(u32 destination) { // It exists! Joy of joy! JMP(blocks.GetBlock(block)->checkedEntry, true); - linkData.linkStatus = true; + b->linkStatus[exit_num] = true; return; } } MOV(32, M(&PC), Imm32(destination)); JMP(asm_routines.dispatcher, true); - - b->linkData.push_back(linkData); } void Jit64::WriteExitDestInEAX() @@ -628,7 +625,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); - WriteExit(ops[i].address); + WriteExit(ops[i].address, 0); SetJumpTarget(noBreakpoint); } @@ -710,7 +707,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc { gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); - WriteExit(nextPC); + WriteExit(nextPC, 0); } b->flags = js.block_flags; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 1adbdfaac4..139414a103 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -99,7 +99,7 @@ public: // Utilities for use by opcodes - void WriteExit(u32 destination); + void WriteExit(u32 destination, int exit_num); void WriteExitDestInEAX(); void WriteExceptionExit(); void WriteExternalExceptionExit(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index a1e28ac6ea..b0f3f1cd18 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -91,7 +91,7 @@ void Jit64::bx(UGeckoInstruction inst) // make idle loops go faster js.downcountAmount += 8; } - WriteExit(destination); + WriteExit(destination, 0); } // TODO - optimize to hell and beyond @@ -136,13 +136,13 @@ void Jit64::bcx(UGeckoInstruction inst) destination = SignExt16(inst.BD << 2); else destination = js.compilerPC + SignExt16(inst.BD << 2); - WriteExit(destination); + WriteExit(destination, 0); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) SetJumpTarget( pCTRDontBranch ); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 1); } void Jit64::bcctrx(UGeckoInstruction inst) @@ -190,7 +190,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) WriteExitDestInEAX(); // Would really like to continue the block here, but it ends. TODO. SetJumpTarget(b); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 1); } } @@ -245,5 +245,5 @@ void Jit64::bclrx(UGeckoInstruction inst) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) SetJumpTarget( pCTRDontBranch ); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 1); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 8299f2b044..244a051aaa 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -400,7 +400,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) destination = SignExt16(js.next_inst.BD << 2); else destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); - WriteExit(destination); + WriteExit(destination, 0); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { @@ -424,7 +424,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) } else { - WriteExit(js.next_compilerPC + 4); + WriteExit(js.next_compilerPC + 4, 0); } js.cancel = true; @@ -507,7 +507,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) destination = SignExt16(js.next_inst.BD << 2); else destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); - WriteExit(destination); + WriteExit(destination, 0); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { @@ -534,7 +534,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); - WriteExit(js.next_compilerPC + 4); + WriteExit(js.next_compilerPC + 4, 1); js.cancel = true; } @@ -2221,5 +2221,5 @@ void Jit64::twx(UGeckoInstruction inst) SetJumpTarget(exit3); SetJumpTarget(exit4); SetJumpTarget(exit5); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 1); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index 1b41c2f1e9..a1c3bd971c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -480,5 +480,5 @@ void Jit64::stmw(UGeckoInstruction inst) void Jit64::icbi(UGeckoInstruction inst) { Default(inst); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 0); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index 8282758a74..573feb3756 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -137,7 +137,7 @@ void Jit64::mtmsr(UGeckoInstruction inst) SetJumpTarget(noExceptionsPending); SetJumpTarget(eeDisabled); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 0); js.firstFPInstructionFound = false; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index ef1db98de1..7a37a1cb43 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -552,8 +552,7 @@ static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { static void regWriteExit(RegInfo& RI, InstLoc dest) { if (isImm(*dest)) { - RI.exitNumber++; - RI.Jit->WriteExit(RI.Build->GetImmValue(dest)); + RI.Jit->WriteExit(RI.Build->GetImmValue(dest), RI.exitNumber++); } else { RI.Jit->WriteExitDestInOpArg(regLocForInst(RI, dest)); } @@ -565,7 +564,7 @@ static bool checkIsSNAN() { return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]); } -static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { +static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit) { //printf("Writing block: %x\n", js.blockStart); RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); RI.Build = ibuild; @@ -1792,7 +1791,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); Jit->TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = Jit->J_CC(CC_Z); - Jit->WriteExit(InstLoc); + Jit->WriteExit(InstLoc, 0); Jit->SetJumpTarget(noBreakpoint); break; } @@ -1820,10 +1819,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } } - Jit->WriteExit(exitAddress); + Jit->WriteExit(jit->js.curBlock->exitAddress[0], 0); Jit->UD2(); } -void JitIL::WriteCode(u32 exitAddress) { - DoWriteCode(&ibuild, this, exitAddress); +void JitIL::WriteCode() { + DoWriteCode(&ibuild, this); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index 1518bc57bb..b15ffbacca 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -381,7 +381,7 @@ void JitIL::Cleanup() ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst); } -void JitIL::WriteExit(u32 destination) +void JitIL::WriteExit(u32 destination, int exit_num) { Cleanup(); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { @@ -391,9 +391,8 @@ void JitIL::WriteExit(u32 destination) //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; - JitBlock::LinkData linkData; - linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); + b->exitAddress[exit_num] = destination; + b->exitPtrs[exit_num] = GetWritableCodePtr(); // Link opportunity! int block = blocks.GetBlockNumberFromStartAddress(destination); @@ -401,14 +400,13 @@ void JitIL::WriteExit(u32 destination) { // It exists! Joy of joy! JMP(blocks.GetBlock(block)->checkedEntry, true); - linkData.linkStatus = true; + b->linkStatus[exit_num] = true; } else { MOV(32, M(&PC), Imm32(destination)); JMP(asm_routines.dispatcher, true); } - b->linkData.push_back(linkData); } void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg) @@ -543,16 +541,14 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Analyze the block, collect all instructions it is made of (including inlining, // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - u32 exitAddress = em_address; - + b->exitAddress[0] = em_address; u32 merged_addresses[32]; const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]); int size_of_merged_addresses = 0; if (!memory_exception) { // If there is a memory exception inside a block (broken_block==true), compile up to that instruction. - // TODO - exitAddress = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses); + b->exitAddress[0] = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses); } PPCAnalyst::CodeOp *ops = code_buf->codebuffer; @@ -711,7 +707,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } // Perform actual code generation - WriteCode(exitAddress); + WriteCode(); b->codeSize = (u32)(GetCodePtr() - normalEntry); b->originalSize = size; diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h index 305a96015f..e56a56c815 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h @@ -105,7 +105,7 @@ public: // Utilities for use by opcodes - void WriteExit(u32 destination); + void WriteExit(u32 destination, int exit_num); void WriteExitDestInOpArg(const Gen::OpArg& arg); void WriteExceptionExit(); void WriteRfiExitDestInOpArg(const Gen::OpArg& arg); @@ -121,7 +121,7 @@ public: void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false); void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); - void WriteCode(u32 exitAddress); + void WriteCode(); // OPCODES void unknown_instruction(UGeckoInstruction _inst) override; diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp index eab7f3711a..4083a383e9 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp @@ -186,16 +186,15 @@ void JitArm::WriteExceptionExit() MOVI2R(A, (u32)asm_routines.testExceptions); B(A); } -void JitArm::WriteExit(u32 destination) +void JitArm::WriteExit(u32 destination, int exit_num) { Cleanup(); DoDownCount(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; - JitBlock::LinkData linkData; - linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); + b->exitAddress[exit_num] = destination; + b->exitPtrs[exit_num] = GetWritableCodePtr(); // Link opportunity! int block = blocks.GetBlockNumberFromStartAddress(destination); @@ -203,7 +202,7 @@ void JitArm::WriteExit(u32 destination) { // It exists! Joy of joy! B(blocks.GetBlock(block)->checkedEntry); - linkData.linkStatus = true; + b->linkStatus[exit_num] = true; } else { @@ -213,8 +212,6 @@ void JitArm::WriteExit(u32 destination) MOVI2R(A, (u32)asm_routines.dispatcher); B(A); } - - b->linkData.push_back(linkData); } void STACKALIGN JitArm::Run() @@ -499,7 +496,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo if (broken_block) { printf("Broken Block going to 0x%08x\n", nextPC); - WriteExit(nextPC); + WriteExit(nextPC, 0); } b->flags = js.block_flags; diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index 4d1d2d8a4e..fc1911c5bf 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -109,7 +109,7 @@ public: // Utilities for use by opcodes - void WriteExit(u32 destination); + void WriteExit(u32 destination, int exit_num); void WriteExitDestInR(ARMReg Reg); void WriteRfiExitDestInR(ARMReg Reg); void WriteExceptionExit(); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp index eac488a5b0..4e9101f7b2 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp @@ -154,7 +154,7 @@ void JitArm::bx(UGeckoInstruction inst) MOVI2R(R14, (u32)asm_routines.testExceptions); B(R14); } - WriteExit(destination); + WriteExit(destination, 0); } void JitArm::bcx(UGeckoInstruction inst) @@ -209,14 +209,14 @@ void JitArm::bcx(UGeckoInstruction inst) destination = SignExt16(inst.BD << 2); else destination = js.compilerPC + SignExt16(inst.BD << 2); - WriteExit(destination); + WriteExit(destination, 0); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) SetJumpTarget( pCTRDontBranch ); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 1); } void JitArm::bcctrx(UGeckoInstruction inst) { @@ -278,7 +278,7 @@ void JitArm::bcctrx(UGeckoInstruction inst) WriteExitDestInR(rA); SetJumpTarget(b); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 1); } } void JitArm::bclrx(UGeckoInstruction inst) @@ -355,5 +355,5 @@ void JitArm::bclrx(UGeckoInstruction inst) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) SetJumpTarget( pCTRDontBranch ); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 1); } diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp index 3983772aa6..f04c88d6c3 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -531,6 +531,6 @@ void JitArm::dcbst(UGeckoInstruction inst) void JitArm::icbi(UGeckoInstruction inst) { Default(inst); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 0); } diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index 62b15e1d5e..e4ab630fce 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -205,7 +205,7 @@ void JitArm::mtmsr(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - WriteExit(js.compilerPC + 4); + WriteExit(js.compilerPC + 4, 0); } void JitArm::mfmsr(UGeckoInstruction inst) diff --git a/Source/Core/Core/Src/PowerPC/JitArmIL/IR_Arm.cpp b/Source/Core/Core/Src/PowerPC/JitArmIL/IR_Arm.cpp index c1bee507fc..10620d9183 100644 --- a/Source/Core/Core/Src/PowerPC/JitArmIL/IR_Arm.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArmIL/IR_Arm.cpp @@ -156,8 +156,7 @@ static ARMReg regEnsureInReg(RegInfo& RI, InstLoc I) { static void regWriteExit(RegInfo& RI, InstLoc dest) { if (isImm(*dest)) { - RI.exitNumber++; - RI.Jit->WriteExit(RI.Build->GetImmValue(dest)); + RI.Jit->WriteExit(RI.Build->GetImmValue(dest), RI.exitNumber++); } else { RI.Jit->WriteExitDestInReg(regLocForInst(RI, dest)); } @@ -282,7 +281,7 @@ static void regEmitCmp(RegInfo& RI, InstLoc I) { } } -static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) { +static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit) { RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); RI.Build = ibuild; @@ -734,10 +733,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) { } } - Jit->WriteExit(exitAddress); + Jit->WriteExit(jit->js.curBlock->exitAddress[0], 0); Jit->BKPT(0x111); } -void JitArmIL::WriteCode(u32 exitAddress) { - DoWriteCode(&ibuild, this, exitAddress); +void JitArmIL::WriteCode() { + DoWriteCode(&ibuild, this); } diff --git a/Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.cpp index 44ebc8b6eb..0aac4d6722 100644 --- a/Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.cpp @@ -117,14 +117,13 @@ void JitArmIL::WriteExceptionExit() MOVI2R(R14, (u32)asm_routines.testExceptions); B(R14); } -void JitArmIL::WriteExit(u32 destination) +void JitArmIL::WriteExit(u32 destination, int exit_num) { DoDownCount(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; - JitBlock::LinkData linkData; - linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); + b->exitAddress[exit_num] = destination; + b->exitPtrs[exit_num] = GetWritableCodePtr(); // Link opportunity! int block = blocks.GetBlockNumberFromStartAddress(destination); @@ -132,7 +131,7 @@ void JitArmIL::WriteExit(u32 destination) { // It exists! Joy of joy! B(blocks.GetBlock(block)->checkedEntry); - linkData.linkStatus = true; + b->linkStatus[exit_num] = true; } else { @@ -141,8 +140,6 @@ void JitArmIL::WriteExit(u32 destination) MOVI2R(R14, (u32)asm_routines.dispatcher); B(R14); } - - b->linkData.push_back(linkData); } void JitArmIL::PrintDebug(UGeckoInstruction inst, u32 level) { @@ -350,12 +347,12 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB if (broken_block) { printf("Broken Block going to 0x%08x\n", nextPC); - WriteExit(nextPC); + WriteExit(nextPC, 0); } // Perform actual code generation - WriteCode(nextPC); + WriteCode(); b->flags = js.block_flags; b->codeSize = (u32)(GetCodePtr() - normalEntry); b->originalSize = size; diff --git a/Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.h b/Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.h index 71b09a3251..4dec87ddeb 100644 --- a/Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.h +++ b/Source/Core/Core/Src/PowerPC/JitArmIL/JitIL.h @@ -64,8 +64,8 @@ public: void Run(); void SingleStep(); // - void WriteCode(u32 exitAddress); - void WriteExit(u32 destination); + void WriteCode(); + void WriteExit(u32 destination, int exit_num); void WriteExitDestInReg(ARMReg Reg); void WriteRfiExitDestInR(ARMReg Reg); void WriteExceptionExit(); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp index b81dadd394..d7c78d9d17 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp @@ -35,6 +35,8 @@ op_agent_t agent; using namespace Gen; +#define INVALID_EXIT 0xFFFFFFFF + bool JitBaseBlockCache::IsFull() const { return GetNumBlocks() >= MAX_NUM_BLOCKS - 1; @@ -165,6 +167,12 @@ using namespace Gen; JitBlock &b = blocks[num_blocks]; b.invalid = false; b.originalAddress = em_address; + b.exitAddress[0] = INVALID_EXIT; + b.exitAddress[1] = INVALID_EXIT; + b.exitPtrs[0] = 0; + b.exitPtrs[1] = 0; + b.linkStatus[0] = false; + b.linkStatus[1] = false; num_blocks++; //commit the current block return num_blocks - 1; } @@ -185,9 +193,10 @@ using namespace Gen; block_map[std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr)] = block_num; if (block_link) { - for (const auto& e : b.linkData) + for (int i = 0; i < 2; i++) { - links_to.insert(std::pair(e.exitAddress, block_num)); + if (b.exitAddress[i] != INVALID_EXIT) + links_to.insert(std::pair(b.exitAddress[i], block_num)); } LinkBlock(block_num); @@ -266,15 +275,15 @@ using namespace Gen; // This block is dead. Don't relink it. return; } - for (auto& e : b.linkData) + for (int e = 0; e < 2; e++) { - if (!e.linkStatus) + if (b.exitAddress[e] != INVALID_EXIT && !b.linkStatus[e]) { - int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress); + int destinationBlock = GetBlockNumberFromStartAddress(b.exitAddress[e]); if (destinationBlock != -1) { - WriteLinkBlock(e.exitPtrs, blocks[destinationBlock].checkedEntry); - e.linkStatus = true; + WriteLinkBlock(b.exitPtrs[e], blocks[destinationBlock].checkedEntry); + b.linkStatus[e] = true; } } } @@ -307,10 +316,10 @@ using namespace Gen; return; for (multimap::iterator iter = ppp.first; iter != ppp.second; ++iter) { JitBlock &sourceBlock = blocks[iter->second]; - for (auto& e : sourceBlock.linkData) + for (int e = 0; e < 2; e++) { - if (e.exitAddress == b.originalAddress) - e.linkStatus = false; + if (sourceBlock.exitAddress[e] == b.originalAddress) + sourceBlock.linkStatus[e] = false; } } } diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h index ffbffaadb5..b81c5d837a 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h @@ -35,6 +35,9 @@ struct JitBlock const u8 *checkedEntry; const u8 *normalEntry; + u8 *exitPtrs[2]; // to be able to rewrite the exit jum + u32 exitAddress[2]; // 0xFFFFFFFF == unknown + u32 originalAddress; u32 codeSize; u32 originalSize; @@ -42,13 +45,7 @@ struct JitBlock int flags; bool invalid; - - struct LinkData { - u8 *exitPtrs; // to be able to rewrite the exit jum - u32 exitAddress; - bool linkStatus; // is it already linked? - }; - std::vector linkData; + bool linkStatus[2]; #ifdef _WIN32 // we don't really need to save start and stop From 95aeedec19db25f7de3bd37cc7281f6f70e0ab58 Mon Sep 17 00:00:00 2001 From: degasus Date: Tue, 26 Nov 2013 20:05:49 +0100 Subject: [PATCH 5/6] OpenGL: readback efb2ram with different strides at once This is done with a pixel buffer object. We still have to stall the GPU, but we only do it once per efb2ram call. As the cpu can't access the vram, it has to queue a memcpy for the gpu and wait for the gpu to finish this copy. We did this for every cache line which is just stupid. Now we copy the complete texture into a pbo and readback this at once. So we don't have to wait for lots of round-trip-times. --- .../OGL/Src/TextureConverter.cpp | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index f653dfbf01..290dc510cf 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -44,6 +44,8 @@ static GLuint s_encode_VBO = 0; static GLuint s_encode_VAO = 0; static TargetRectangle s_cached_sourceRc; +static GLuint s_PBO = 0; // for readback with different strides + static const char *VProgram = "ATTRIN vec2 rawpos;\n" "ATTRIN vec2 tex0;\n" @@ -186,6 +188,8 @@ void Init() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, renderBufferWidth, renderBufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glGenBuffers(1, &s_PBO); + CreatePrograms(); } @@ -196,6 +200,7 @@ void Shutdown() glDeleteFramebuffers(1, &s_texConvFrameBuffer); glDeleteBuffers(1, &s_encode_VBO ); glDeleteVertexArrays(1, &s_encode_VAO ); + glDeleteBuffers(1, &s_PBO); s_rgbToYuyvProgram.Destroy(); s_yuyvToRgbProgram.Destroy(); @@ -206,6 +211,7 @@ void Shutdown() s_srcTexture = 0; s_dstTexture = 0; s_texConvFrameBuffer = 0; + s_PBO = 0; } void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, @@ -267,25 +273,37 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, // TODO: make this less slow. int writeStride = bpmem.copyMipMapStrideChannels * 32; + int dstSize = dstWidth*dstHeight*4; + int readHeight = readStride / dstWidth / 4; // 4 bytes per pixel + int readLoops = dstHeight / readHeight; - if (writeStride != readStride && toTexture) + if (writeStride != readStride && readLoops > 1 && toTexture) { // writing to a texture of a different size + // also copy more then one block line, so the different strides matters + // copy into one pbo first, map this buffer, and then memcpy into gc memory + // in this way, we only have one vram->ram transfer, but maybe a bigger + // cpu overhead because of the pbo + glBindBuffer(GL_PIXEL_PACK_BUFFER, s_PBO); + glBufferData(GL_PIXEL_PACK_BUFFER, dstSize, NULL, GL_STREAM_READ); + glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); + u8* pbo = (u8*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, dstSize, GL_MAP_READ_BIT); - int readHeight = readStride / dstWidth; - readHeight /= 4; // 4 bytes per pixel - - int readStart = 0; - int readLoops = dstHeight / readHeight; + //int readStart = 0; for (int i = 0; i < readLoops; i++) { - glReadPixels(0, readStart, (GLsizei)dstWidth, (GLsizei)readHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr); - readStart += readHeight; + memcpy(destAddr, pbo, readStride); + pbo += readStride; destAddr += writeStride; } + + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); } else + { glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr); + } GL_REPORT_ERRORD(); From 69137cff4c58fbdfcb2b67ad51d2d68cc8ca0cbc Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 29 Nov 2013 06:09:54 +0100 Subject: [PATCH 6/6] Merge X11+D3D FreeLook feature into DolphinWX This removes the redundant code and also implements this feature for OSX and Wayland. But so it's dropped for non-wx builds... imo DolphinWX still isn't the best place for this, but now it's in the same file as all other hotkeys. Maybe they'll be moved to InputCommon sometimes at once ... --- Source/Core/DolphinWX/Src/Frame.cpp | 53 +++++++++++++++++ .../DolphinWX/Src/GLInterface/X11_Util.cpp | 57 ------------------- Source/Core/VideoCommon/Src/EmuWindow.cpp | 53 ----------------- 3 files changed, 53 insertions(+), 110 deletions(-) diff --git a/Source/Core/DolphinWX/Src/Frame.cpp b/Source/Core/DolphinWX/Src/Frame.cpp index 1ca16ff1ac..5a255567d5 100644 --- a/Source/Core/DolphinWX/Src/Frame.cpp +++ b/Source/Core/DolphinWX/Src/Frame.cpp @@ -1028,6 +1028,59 @@ void CFrame::OnMouse(wxMouseEvent& event) event.GetPosition().x, event.GetPosition().y, event.ButtonDown()); } #endif + + // next handlers are all for FreeLook, so we don't need to check them if disabled + if(!g_Config.bFreeLook) + { + event.Skip(); + return; + } + + // Free look variables + static bool mouseLookEnabled = false; + static bool mouseMoveEnabled = false; + static float lastMouse[2]; + + if(event.MiddleDown()) + { + lastMouse[0] = event.GetX(); + lastMouse[1] = event.GetY(); + mouseMoveEnabled = true; + } + else if(event.RightDown()) + { + lastMouse[0] = event.GetX(); + lastMouse[1] = event.GetY(); + mouseLookEnabled = true; + } + else if(event.MiddleUp()) + { + mouseMoveEnabled = false; + } + else if(event.RightUp()) + { + mouseLookEnabled = false; + } + // no button, so it's a move event + else if(event.GetButton() == wxMOUSE_BTN_NONE) + { + if (mouseLookEnabled) + { + VertexShaderManager::RotateView((event.GetX() - lastMouse[0]) / 200.0f, + (event.GetY() - lastMouse[1]) / 200.0f); + lastMouse[0] = event.GetX(); + lastMouse[1] = event.GetY(); + } + + if (mouseMoveEnabled) + { + VertexShaderManager::TranslateView((event.GetX() - lastMouse[0]) / 50.0f, + (event.GetY() - lastMouse[1]) / 50.0f); + lastMouse[0] = event.GetX(); + lastMouse[1] = event.GetY(); + } + } + event.Skip(); } diff --git a/Source/Core/DolphinWX/Src/GLInterface/X11_Util.cpp b/Source/Core/DolphinWX/Src/GLInterface/X11_Util.cpp index e57c095d86..e3ea3f3c90 100644 --- a/Source/Core/DolphinWX/Src/GLInterface/X11_Util.cpp +++ b/Source/Core/DolphinWX/Src/GLInterface/X11_Util.cpp @@ -18,7 +18,6 @@ #include "Host.h" #include "VideoConfig.h" #include "../GLInterface.h" -#include "VertexShaderManager.h" #if USE_EGL bool cXInterface::ServerConnect(void) @@ -166,10 +165,6 @@ void cX11Window::DestroyXWindow(void) void cX11Window::XEventThread() #endif { - // Free look variables - static bool mouseLookEnabled = false; - static bool mouseMoveEnabled = false; - static float lastMouse[2]; while (GLWin.win) { XEvent event; @@ -177,58 +172,6 @@ void cX11Window::XEventThread() { XNextEvent(GLWin.evdpy, &event); switch(event.type) { - case ButtonPress: - if (g_Config.bFreeLook) - { - switch (event.xbutton.button) - { - case 2: // Middle button - lastMouse[0] = event.xbutton.x; - lastMouse[1] = event.xbutton.y; - mouseMoveEnabled = true; - break; - case 3: // Right button - lastMouse[0] = event.xbutton.x; - lastMouse[1] = event.xbutton.y; - mouseLookEnabled = true; - break; - } - } - break; - case ButtonRelease: - if (g_Config.bFreeLook) - { - switch (event.xbutton.button) - { - case 2: // Middle button - mouseMoveEnabled = false; - break; - case 3: // Right button - mouseLookEnabled = false; - break; - } - } - break; - case MotionNotify: - if (g_Config.bFreeLook) - { - if (mouseLookEnabled) - { - VertexShaderManager::RotateView((event.xmotion.x - lastMouse[0]) / 200.0f, - (event.xmotion.y - lastMouse[1]) / 200.0f); - lastMouse[0] = event.xmotion.x; - lastMouse[1] = event.xmotion.y; - } - - if (mouseMoveEnabled) - { - VertexShaderManager::TranslateView((event.xmotion.x - lastMouse[0]) / 50.0f, - (event.xmotion.y - lastMouse[1]) / 50.0f); - lastMouse[0] = event.xmotion.x; - lastMouse[1] = event.xmotion.y; - } - } - break; case ConfigureNotify: GLInterface->SetBackBufferDimensions(event.xconfigure.width, event.xconfigure.height); break; diff --git a/Source/Core/VideoCommon/Src/EmuWindow.cpp b/Source/Core/VideoCommon/Src/EmuWindow.cpp index 9b8cde5178..7bb675789e 100644 --- a/Source/Core/VideoCommon/Src/EmuWindow.cpp +++ b/Source/Core/VideoCommon/Src/EmuWindow.cpp @@ -7,7 +7,6 @@ #include "VideoConfig.h" #include "EmuWindow.h" #include "Fifo.h" -#include "VertexShaderManager.h" #include "VideoBackendBase.h" #include "Core.h" #include "Host.h" @@ -41,60 +40,8 @@ HWND GetParentWnd() return m_hParent; } -void FreeLookInput( UINT iMsg, WPARAM wParam ) -{ - static bool mouseLookEnabled = false; - static bool mouseMoveEnabled = false; - static float lastMouse[2]; - POINT point; - - switch(iMsg) - { - case WM_MOUSEMOVE: - if (mouseLookEnabled) - { - GetCursorPos(&point); - VertexShaderManager::RotateView((point.x - lastMouse[0]) / 200.0f, (point.y - lastMouse[1]) / 200.0f); - lastMouse[0] = (float)point.x; - lastMouse[1] = (float)point.y; - } - - if (mouseMoveEnabled) - { - GetCursorPos(&point); - VertexShaderManager::TranslateView((point.x - lastMouse[0]) / 50.0f, (point.y - lastMouse[1]) / 50.0f); - lastMouse[0] = (float)point.x; - lastMouse[1] = (float)point.y; - } - break; - - case WM_RBUTTONDOWN: - GetCursorPos(&point); - lastMouse[0] = (float)point.x; - lastMouse[1] = (float)point.y; - mouseLookEnabled= true; - break; - case WM_MBUTTONDOWN: - GetCursorPos(&point); - lastMouse[0] = (float)point.x; - lastMouse[1] = (float)point.y; - mouseMoveEnabled= true; - break; - case WM_RBUTTONUP: - mouseLookEnabled = false; - break; - case WM_MBUTTONUP: - mouseMoveEnabled = false; - break; - } -} - - LRESULT CALLBACK WndProc( HWND hWnd, UINT iMsg, WPARAM wParam, LPARAM lParam ) { - if (g_ActiveConfig.bFreeLook) - FreeLookInput( iMsg, wParam ); - switch( iMsg ) { case WM_PAINT: