From 29808cdde2aa918f453d9d466e238f24a1f2d62f Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sat, 26 Sep 2009 12:39:12 +0000 Subject: [PATCH] OpenGL: commit rodolfoosvaldobogado's (what a name!) speedup patches. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4322 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Core/VideoCommon/Src/IndexGenerator.cpp | 7 + Source/Core/VideoCommon/Src/IndexGenerator.h | 2 + Source/Core/VideoCommon/Src/VertexLoader.cpp | 3 +- .../VideoCommon/Src/VertexShaderManager.cpp | 19 +- .../Src/FramebufferManager.cpp | 8 +- .../Plugin_VideoOGL/Src/PixelShaderCache.cpp | 49 +++- .../Plugin_VideoOGL/Src/PixelShaderCache.h | 10 + .../Plugin_VideoOGL/Src/PostProcessing.cpp | 6 +- Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 24 +- .../Plugin_VideoOGL/Src/TextureConverter.cpp | 38 ++-- .../Plugin_VideoOGL/Src/TextureMngr.cpp | 7 +- .../Plugin_VideoOGL/Src/VertexManager.cpp | 211 ++++++++++-------- .../Plugin_VideoOGL/Src/VertexManager.h | 2 +- .../Plugin_VideoOGL/Src/VertexShaderCache.cpp | 50 ++++- .../Plugin_VideoOGL/Src/VertexShaderCache.h | 8 + Source/Plugins/Plugin_VideoOGL/Src/main.cpp | 1 + 16 files changed, 295 insertions(+), 150 deletions(-) diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.cpp b/Source/Core/VideoCommon/Src/IndexGenerator.cpp index b2dd3efd49..407fbe6579 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/Src/IndexGenerator.cpp @@ -32,6 +32,7 @@ void IndexGenerator::Start(unsigned short *startptr) index = 0; numPrims = 0; adds = 0; + indexLen = 0; onlyLists = true; } @@ -45,6 +46,7 @@ void IndexGenerator::AddList(int numVerts) *ptr++ = index+i*3+1; *ptr++ = index+i*3+2; } + indexLen += numVerts; index += numVerts; numPrims += numTris; adds++; @@ -62,6 +64,7 @@ void IndexGenerator::AddStrip(int numVerts) *ptr++ = index+i+(wind?1:2); wind = !wind; } + indexLen += numTris * 3; index += numVerts; numPrims += numTris; adds++; @@ -77,6 +80,7 @@ void IndexGenerator::AddLineList(int numVerts) *ptr++ = index+i*2; *ptr++ = index+i*2+1; } + indexLen += numVerts; index += numVerts; numPrims += numLines; adds++; @@ -91,6 +95,7 @@ void IndexGenerator::AddLineStrip(int numVerts) *ptr++ = index+i; *ptr++ = index+i+1; } + indexLen += numLines * 2; index += numVerts; numPrims += numLines; adds++; @@ -107,6 +112,7 @@ void IndexGenerator::AddFan(int numVerts) *ptr++ = index+i+1; *ptr++ = index+i+2; } + indexLen += numTris * 3; index += numVerts; numPrims += numTris; adds++; @@ -126,6 +132,7 @@ void IndexGenerator::AddQuads(int numVerts) *ptr++ = index+i*4+2; *ptr++ = index+i*4+3; } + indexLen += numTris * 3; index += numVerts; numPrims += numTris; adds++; diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.h b/Source/Core/VideoCommon/Src/IndexGenerator.h index 537b71d9a2..bde0a4dab4 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.h +++ b/Source/Core/VideoCommon/Src/IndexGenerator.h @@ -35,12 +35,14 @@ public: int GetNumPrims() {return numPrims;} //returns numprimitives int GetNumVerts() {return index;} //returns numprimitives int GetNumAdds() {return adds;} + int GetindexLen() {return indexLen;} bool GetOnlyLists() {return onlyLists;} private: unsigned short *ptr; int numPrims; int index; int adds; + int indexLen; bool onlyLists; }; diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 9b61dfa53f..09d5b16d14 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -602,7 +602,8 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) // Flush if our vertex format is different from the currently set. if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt) { - VertexManager::Flush(); + //Don't flush here we can join some primitives, let the vertex manager do this work + //VertexManager::Flush(); // Also move the Set() here? } g_nativeVertexFmt = m_NativeFmt; diff --git a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp index 3a5936e3fe..e278cb45d9 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp @@ -122,11 +122,12 @@ void VertexShaderManager::SetConstants() for (int i = istart; i < iend; ++i) { u32 color = *(const u32*)(xfmemptr + 3); + float NormalizationCoef = 1 / 255.0f; SetVSConstant4f(C_LIGHTS + 5 * i, - ((color >> 24) & 0xFF) / 255.0f, - ((color >> 16) & 0xFF) / 255.0f, - ((color >> 8) & 0xFF) / 255.0f, - ((color) & 0xFF) / 255.0f); + ((color >> 24) & 0xFF) * NormalizationCoef, + ((color >> 16) & 0xFF) * NormalizationCoef, + ((color >> 8) & 0xFF) * NormalizationCoef, + ((color) & 0xFF) * NormalizationCoef); xfmemptr += 4; for (int j = 0; j < 4; ++j, xfmemptr += 3) @@ -466,11 +467,11 @@ void VertexShaderManager::SetMaterialColor(int index, u32 data) int ind = index * 4; nMaterialsChanged |= (1 << index); - - s_fMaterials[ind++] = ((data >> 24) & 0xFF) / 255.0f; - s_fMaterials[ind++] = ((data >> 16) & 0xFF) / 255.0f; - s_fMaterials[ind++] = ((data >> 8) & 0xFF) / 255.0f; - s_fMaterials[ind] = ( data & 0xFF) / 255.0f; + float NormalizationCoef = 1 / 255.0f; + s_fMaterials[ind++] = ((data >> 24) & 0xFF) * NormalizationCoef; + s_fMaterials[ind++] = ((data >> 16) & 0xFF) * NormalizationCoef; + s_fMaterials[ind++] = ((data >> 8) & 0xFF) * NormalizationCoef; + s_fMaterials[ind] = ( data & 0xFF) * NormalizationCoef; } void VertexShaderManager::TranslateView(float x, float y) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/FramebufferManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/FramebufferManager.cpp index c237c71043..c047bfd3ce 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/FramebufferManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/FramebufferManager.cpp @@ -58,7 +58,7 @@ void FramebufferManager::Init(int targetWidth, int targetHeight, int msaaSamples m_efbDepth = glObj[1]; glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbColor); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbDepth); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL); @@ -119,7 +119,7 @@ void FramebufferManager::Init(int targetWidth, int targetHeight, int msaaSamples m_resolvedDepthTexture = glObj[1]; glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedColorTexture); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedDepthTexture); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL); @@ -343,7 +343,7 @@ void FramebufferManager::copyToVirtualXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight // the image will be allocated by glCopyTexImage2D (later). glBindTexture(GL_TEXTURE_RECTANGLE_ARB, xfbTexture); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, m_targetWidth, m_targetHeight, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); } @@ -431,7 +431,7 @@ const XFBSource* FramebufferManager::getRealXFBSource(u32 xfbAddr, u32 fbWidth, glGenTextures(1, &m_realXFBSource.texture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_realXFBSource.texture); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, MAX_XFB_WIDTH, MAX_XFB_HEIGHT, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, MAX_XFB_WIDTH, MAX_XFB_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index 929247c7da..bed5cc0fc4 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -40,6 +40,8 @@ static GLuint s_DepthMatrixProgram = 0; PixelShaderCache::PSCache PixelShaderCache::pshaders; PIXELSHADERUID PixelShaderCache::s_curuid; bool PixelShaderCache::s_displayCompileAlert; +GLuint PixelShaderCache::CurrentShader; +bool PixelShaderCache::ShaderEnabled; static FRAGMENTSHADER* pShaderLast = NULL; static float lastPSconstants[C_COLORMATRIX+16][4]; @@ -138,6 +140,9 @@ void PixelShaderCache::Init() glDeleteProgramsARB(1, &s_DepthMatrixProgram); s_DepthMatrixProgram = 0; } + CurrentShader=0; + ShaderEnabled = false; + EnableShader(s_DepthMatrixProgram); } void PixelShaderCache::Shutdown() @@ -280,7 +285,9 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr } glGenProgramsARB(1, &ps.glprogid); - glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps.glprogid); + EnableShader(ps.glprogid); + //glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps.glprogid); + //CurrentShader = ps.glprogid; glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); err = GL_REPORT_ERROR(); @@ -312,3 +319,43 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr #endif return true; } + +//Disable Fragment programs and reset the selected Program +void PixelShaderCache::DisableShader() +{ + CurrentShader = 0; + if(ShaderEnabled) + { + glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader); + glDisable(GL_FRAGMENT_PROGRAM_ARB); + ShaderEnabled = false; + } +} + + +//bind a program if is diferent from the binded oone +void PixelShaderCache::SetCurrentShader(GLuint Shader) +{ + //The caching here breakes Super Mario Sunshine i'm still trying to figure out wy + if(ShaderEnabled /*&& CurrentShader != Shader*/) + { + CurrentShader = Shader; + glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader); + } +} + +//Enable Fragment program and bind initial program +void PixelShaderCache::EnableShader(GLuint Shader) +{ + if(!ShaderEnabled) + { + glEnable(GL_FRAGMENT_PROGRAM_ARB); + ShaderEnabled = true; + CurrentShader = 0; + } + if(CurrentShader != Shader) + { + CurrentShader = Shader; + glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader); + } +} diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h index 05ef214c37..241e7167b2 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h @@ -63,6 +63,10 @@ class PixelShaderCache static bool s_displayCompileAlert; + static GLuint CurrentShader; + + static bool ShaderEnabled; + public: static void Init(); static void ProgressiveCleanup(); @@ -74,6 +78,12 @@ public: static GLuint GetColorMatrixProgram(); static GLuint GetDepthMatrixProgram(); + + static void SetCurrentShader(GLuint Shader); + + static void DisableShader(); + + static void EnableShader(GLuint Shader); }; #endif // _PIXELSHADERCACHE_H_ diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PostProcessing.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PostProcessing.cpp index ea06377f04..cb08e5a09c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PostProcessing.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PostProcessing.cpp @@ -74,14 +74,12 @@ bool ApplyShader() if (s_shader.glprogid != 0) { - glEnable(GL_FRAGMENT_PROGRAM_ARB); - glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_shader.glprogid); + PixelShaderCache::EnableShader(s_shader.glprogid); return true; } else { - glDisable(GL_FRAGMENT_PROGRAM_ARB); - glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0); + PixelShaderCache::DisableShader(); return false; } } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 640312a262..f6d95c1ad1 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -564,8 +564,8 @@ void Renderer::ResetAPIState() { // Gets us to a reasonably sane state where it's possible to do things like // image copies with textured quads, etc. - glDisable(GL_VERTEX_PROGRAM_ARB); - glDisable(GL_FRAGMENT_PROGRAM_ARB); + VertexShaderCache::DisableShader(); + PixelShaderCache::DisableShader(); glDisable(GL_SCISSOR_TEST); glDisable(GL_DEPTH_TEST); @@ -597,8 +597,8 @@ void Renderer::RestoreAPIState() SetColorMask(); SetBlendMode(true); - glEnable(GL_VERTEX_PROGRAM_ARB); - glEnable(GL_FRAGMENT_PROGRAM_ARB); + VertexShaderCache::EnableShader(0); + PixelShaderCache::EnableShader(0); } void Renderer::SetColorMask() @@ -843,7 +843,6 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) { if (s_skipSwap) return; - const XFBSource* xfbSource = g_framebufferManager.GetXFBSource(xfbAddr, fbWidth, fbHeight); if (!xfbSource) { @@ -917,9 +916,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) glTexCoord2f(sourceRc.right, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f( 1, 1); glTexCoord2f(sourceRc.right, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f( 1, -1); glEnd(); - - glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0); - glDisable(GL_FRAGMENT_PROGRAM_ARB); + PixelShaderCache::DisableShader();; } else { @@ -1070,12 +1067,12 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) // --------------------------------------------------------------------- GL_REPORT_ERRORD(); - for (int i = 0; i < 8; i++) { + /*for (int i = 0; i < 8; i++) { glActiveTexture(GL_TEXTURE0 + i); glDisable(GL_TEXTURE_2D); glDisable(GL_TEXTURE_RECTANGLE_ARB); } - glActiveTexture(GL_TEXTURE0); + glActiveTexture(GL_TEXTURE0);*/ DrawDebugText(); @@ -1087,8 +1084,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) OSD::DrawMessages(); if (blend_enabled) glEnable(GL_BLEND); - GL_REPORT_ERRORD(); - + GL_REPORT_ERRORD(); #if defined(DVPROFILE) if (g_bWriteProfile) { //g_bWriteProfile = 0; @@ -1107,8 +1103,8 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) GL_REPORT_ERRORD(); // Clear framebuffer - glClearColor(0, 0, 0, 0); - glClear(GL_COLOR_BUFFER_BIT); + //glClearColor(0, 0, 0, 0); + //glClear(GL_COLOR_BUFFER_BIT); GL_REPORT_ERRORD(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp index efe24a4fcd..fe47191284 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp @@ -34,6 +34,8 @@ namespace TextureConverter static GLuint s_texConvFrameBuffer = 0; static GLuint s_srcTexture = 0; // for decoding from RAM +static GLuint s_srcTextureWidth = 0; +static GLuint s_srcTextureHeight = 0; static GLuint s_dstRenderBuffer = 0; // for encoding to RAM const int renderBufferWidth = 1024; @@ -60,15 +62,12 @@ void CreateRgbToYuyvProgram() " float2 uv1 = float2(uv0.x + 1.0f, uv0.y);\n" " float3 c0 = texRECT(samp0, uv0).rgb;\n" " float3 c1 = texRECT(samp0, uv1).rgb;\n" - - " float y0 = (0.257f * c0.r) + (0.504f * c0.g) + (0.098f * c0.b) + 0.0625f;\n" - " float u0 =-(0.148f * c0.r) - (0.291f * c0.g) + (0.439f * c0.b) + 0.5f;\n" - " float v0 = (0.439f * c0.r) - (0.368f * c0.g) - (0.071f * c0.b) + 0.5f;\n" - " float y1 = (0.257f * c1.r) + (0.504f * c1.g) + (0.098f * c1.b) + 0.0625f;\n" - " float u1 =-(0.148f * c1.r) - (0.291f * c1.g) + (0.439f * c1.b) + 0.5f;\n" - " float v1 = (0.439f * c1.r) - (0.368f * c1.g) - (0.071f * c1.b) + 0.5f;\n" - - " ocol0 = float4(y1, (u0 + u1) / 2, y0, (v0 + v1) / 2);\n" + " float3 y_const = float3(0.257f,0.504f,0.098f);\n" + " float3 u_const = float3(-0.148f,-0.291f,0.439f);\n" + " float3 v_const = float3(0.439f,-0.368f,-0.071f);\n" + " float4 const3 = float4(0.0625f,0.5f,0.0625f,0.5f);\n" + " float3 c01 = (c0 + c1) * 0.5f;\n" + " ocol0 = float4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n" "}\n"; if (!PixelShaderCache::CompilePixelShader(s_rgbToYuyvProgram, FProgram)) { @@ -204,8 +203,7 @@ void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const Tar glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight); - glEnable(GL_FRAGMENT_PROGRAM_ARB); - glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader.glprogid); + PixelShaderCache::EnableShader(shader.glprogid); // Draw... glBegin(GL_QUADS); @@ -218,7 +216,7 @@ void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const Tar // .. and then readback the results. // TODO: make this less slow. - glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr); + glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destAddr); GL_REPORT_ERRORD(); g_framebufferManager.SetFramebuffer(0); @@ -332,13 +330,21 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_srcTexture); // TODO: make this less slow. (How?) - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, (GLsizei)srcFmtWidth, (GLsizei)srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr); + if(s_srcTextureWidth == (GLsizei)srcFmtWidth && s_srcTextureHeight == (GLsizei)srcHeight) + { + glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,0,0,s_srcTextureWidth, s_srcTextureHeight, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr); + } + else + { + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, (GLsizei)srcFmtWidth, (GLsizei)srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr); + s_srcTextureWidth = (GLsizei)srcFmtWidth; + s_srcTextureHeight = (GLsizei)srcHeight; + } glViewport(0, 0, srcWidth, srcHeight); - glEnable(GL_FRAGMENT_PROGRAM_ARB); - glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_yuyvToRgbProgram.glprogid); - + PixelShaderCache::EnableShader(s_yuyvToRgbProgram.glprogid); + GL_REPORT_ERRORD(); glBegin(GL_QUADS); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp index 61fee0aaf4..87f3dcab81 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp @@ -513,7 +513,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool { glGenTextures(1, (GLuint *)&entry.texture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, entry.texture); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); GL_REPORT_ERRORD(); } else @@ -531,7 +531,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool glDeleteTextures(1,(GLuint *)&entry.texture); glGenTextures(1, (GLuint *)&entry.texture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, entry.texture); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); GL_REPORT_ERRORD(); } } @@ -692,8 +692,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool glViewport(0, 0, w, h); - glEnable(GL_FRAGMENT_PROGRAM_ARB); - glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); + PixelShaderCache::EnableShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation GL_REPORT_ERRORD(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 87069d221d..f82e802481 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -37,8 +37,9 @@ #include "VertexShaderGen.h" #include "VertexLoader.h" #include "VertexManager.h" +#include "IndexGenerator.h" -#define MAX_BUFFER_SIZE 0x4000 +#define MAX_BUFFER_SIZE 0x50000 // internal state for loading vertices extern NativeVertexFormat *g_nativeVertexFmt; @@ -46,13 +47,17 @@ extern NativeVertexFormat *g_nativeVertexFmt; namespace VertexManager { -static GLuint s_vboBuffers[0x40] = {0}; -static int s_nCurVBOIndex = 0; // current free buffer -static u8 *s_pBaseBufferPointer = NULL; -static std::vector< GLint > s_vertexFirstOffset; -static std::vector< GLsizei > s_vertexGroupSize; -static std::vector< std::pair< GLenum, int > > s_vertexGroups; -u32 s_vertexCount; +static const GLenum c_RenderprimitiveType[8] = +{ + GL_TRIANGLES, + GL_ZERO, //nothing + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_LINES, + GL_LINES, + GL_POINTS +}; static const GLenum c_primitiveType[8] = { @@ -66,35 +71,48 @@ static const GLenum c_primitiveType[8] = GL_POINTS }; +static IndexGenerator indexGen; + +static GLenum lastPrimitive; +static GLenum CurrentRenderPrimitive; + +static u8 *LocalVBuffer; +static u16 *IBuffer; + +#define MAXVBUFFERSIZE 0x50000 +#define MAXIBUFFERSIZE 0x20000 +#define MAXVBOBUFFERCOUNT 0x4 + +static GLuint s_vboBuffers[MAXVBOBUFFERCOUNT] = {0}; +static GLuint s_IBuffers[MAXVBOBUFFERCOUNT] = {0}; +static int s_nCurVBOIndex = 0; // current free buffer + + + bool Init() { - s_pBaseBufferPointer = (u8*)AllocateMemoryPages(MAX_BUFFER_SIZE); - s_pCurBufferPointer = s_pBaseBufferPointer; - + lastPrimitive = GL_ZERO; + CurrentRenderPrimitive = GL_ZERO; + LocalVBuffer = new u8[MAXVBUFFERSIZE]; + IBuffer = new u16[MAXIBUFFERSIZE]; + s_pCurBufferPointer = LocalVBuffer; s_nCurVBOIndex = 0; glGenBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); for (u32 i = 0; i < ARRAYSIZE(s_vboBuffers); ++i) { glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[i]); - glBufferData(GL_ARRAY_BUFFER, MAX_BUFFER_SIZE, NULL, GL_STREAM_DRAW); + glBufferData(GL_ARRAY_BUFFER, MAXVBUFFERSIZE, NULL, GL_STREAM_DRAW); } - glEnableClientState(GL_VERTEX_ARRAY); g_nativeVertexFmt = NULL; GL_REPORT_ERRORD(); - return true; } void Shutdown() { - FreeMemoryPages(s_pBaseBufferPointer, MAX_BUFFER_SIZE); s_pBaseBufferPointer = s_pCurBufferPointer = NULL; + delete [] LocalVBuffer; + delete [] IBuffer; glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); - memset(s_vboBuffers, 0, sizeof(s_vboBuffers)); - - s_vertexFirstOffset.resize(0); - s_vertexGroupSize.resize(0); - s_vertexGroups.resize(0); - s_vertexCount = 0; s_nCurVBOIndex = 0; ResetBuffer(); } @@ -102,58 +120,85 @@ void Shutdown() void ResetBuffer() { s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); - s_pCurBufferPointer = s_pBaseBufferPointer; - s_vertexFirstOffset.resize(0); - s_vertexGroupSize.resize(0); - s_vertexGroups.resize(0); - s_vertexCount = 0; + s_pCurBufferPointer = LocalVBuffer; + CurrentRenderPrimitive = GL_ZERO; + u16 *ptr = 0; + indexGen.Start((unsigned short*)ptr); +} + +void AddIndices(int _primitive, int _numVertices) +{ + switch (_primitive) + { + case GL_QUADS: indexGen.AddQuads(_numVertices); return; + case GL_TRIANGLES: indexGen.AddList(_numVertices); return; + case GL_TRIANGLE_STRIP: indexGen.AddStrip(_numVertices); return; + case GL_TRIANGLE_FAN: indexGen.AddFan(_numVertices); return; + case GL_LINE_STRIP: indexGen.AddLineStrip(_numVertices); return; + case GL_LINES: indexGen.AddLineList(_numVertices); return; + case GL_POINTS: indexGen.AddPoints(_numVertices); return; + } } int GetRemainingSize() { - return MAX_BUFFER_SIZE - (int)(s_pCurBufferPointer - s_pBaseBufferPointer); + return LocalVBuffer + MAXVBUFFERSIZE - s_pCurBufferPointer; } void AddVertices(int primitive, int numvertices) { - _assert_(numvertices > 0); - _assert_(g_nativeVertexFmt != NULL); + if (numvertices <= 0) + return; + if (c_primitiveType[primitive] == GL_ZERO) + return; + DVSTARTPROFILE(); + + lastPrimitive = c_primitiveType[primitive]; + ADDSTAT(stats.thisFrame.numPrims, numvertices); - if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive]) { - // We can join primitives for free here. Not likely to help much, though, but whatever... - if (c_primitiveType[primitive] == GL_TRIANGLES || - c_primitiveType[primitive] == GL_LINES || - c_primitiveType[primitive] == GL_POINTS || - c_primitiveType[primitive] == GL_QUADS) { - INCSTAT(stats.thisFrame.numPrimitiveJoins); - // Easy join - s_vertexGroupSize.back() += numvertices; - s_vertexCount += numvertices; - return; + + if (CurrentRenderPrimitive != c_RenderprimitiveType[primitive]) + { + // We are NOT collecting the right type. + Flush(); + CurrentRenderPrimitive = c_RenderprimitiveType[primitive]; + u16 *ptr = 0; + if (lastPrimitive != GL_POINTS) + { + ptr = IBuffer; } + indexGen.Start((unsigned short*)ptr); + AddIndices(c_primitiveType[primitive], numvertices); } + else // We are collecting the right type, keep going + { + INCSTAT(stats.thisFrame.numPrimitiveJoins); + AddIndices(c_primitiveType[primitive], numvertices); + } +} - s_vertexFirstOffset.push_back(s_vertexCount); - s_vertexGroupSize.push_back(numvertices); - s_vertexCount += numvertices; - if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive]) - s_vertexGroups.back().second++; +inline void Draw(int numVertices, int indexLen) +{ + + if (CurrentRenderPrimitive != GL_POINT) + { + glDrawElements(CurrentRenderPrimitive, indexLen, GL_UNSIGNED_SHORT, IBuffer); + INCSTAT(stats.thisFrame.numIndexedDrawCalls); + } else - s_vertexGroups.push_back(std::make_pair(c_primitiveType[primitive], 1)); - -#if defined(_DEBUG) || defined(DEBUGFAST) - static const char *sprims[8] = {"quads", "nothing", "tris", "tstrip", "tfan", "lines", "lstrip", "points"}; - PRIM_LOG("prim: %s, c=%d", sprims[primitive], numvertices); -#endif + { + glDrawArrays(CurrentRenderPrimitive,0,numVertices); + INCSTAT(stats.thisFrame.numDrawCalls); + } + } void Flush() { - if (s_vertexCount == 0) - return; - - _assert_(s_pCurBufferPointer != s_pBaseBufferPointer); + if (LocalVBuffer == s_pCurBufferPointer) return; + int numVerts = indexGen.GetNumVerts(); + if(numVerts == 0) return; #if defined(_DEBUG) || defined(DEBUGFAST) PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens, @@ -187,9 +232,10 @@ void Flush() GL_REPORT_ERRORD(); - + + glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]); - glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW); + glBufferSubData(GL_ARRAY_BUFFER,0, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer); GL_REPORT_ERRORD(); // setup the pointers @@ -266,54 +312,35 @@ void Flush() // finally bind - // TODO - cache progid, check if same as before. Maybe GL does this internally, though. - // This is the really annoying problem with GL - you never know whether it's worth caching stuff yourself. - if (vs) glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs->glprogid); - if (ps) glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps->glprogid); // Lego Star Wars crashes here. + int groupStart = 0; + if (vs) VertexShaderCache::SetCurrentShader(vs->glprogid); + if (ps) PixelShaderCache::SetCurrentShader(ps->glprogid); // Lego Star Wars crashes here. #if defined(_DEBUG) || defined(DEBUGFAST) PRIM_LOG(""); #endif - int groupStart = 0; - for (unsigned i = 0; i < s_vertexGroups.size(); i++) - { - INCSTAT(stats.thisFrame.numDrawCalls); - glMultiDrawArrays(s_vertexGroups[i].first, - &s_vertexFirstOffset[groupStart], - &s_vertexGroupSize[groupStart], - s_vertexGroups[i].second); - groupStart += s_vertexGroups[i].second; - } - + int numIndexes = indexGen.GetindexLen(); + Draw(numVerts,numIndexes); + // run through vertex groups again to set alpha if (!g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate) { ps = PixelShaderCache::GetShader(true); - if (ps) glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps->glprogid); + if (ps)PixelShaderCache::SetCurrentShader(ps->glprogid); - // only update alpha - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE); + // only update alpha + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE); - glDisable(GL_BLEND); + glDisable(GL_BLEND); - groupStart = 0; - for (unsigned i = 0; i < s_vertexGroups.size(); i++) - { - INCSTAT(stats.thisFrame.numDrawCalls); - glMultiDrawArrays(s_vertexGroups[i].first, - &s_vertexFirstOffset[groupStart], - &s_vertexGroupSize[groupStart], - s_vertexGroups[i].second); - groupStart += s_vertexGroups[i].second; - } + Draw(numVerts,numIndexes); + // restore color mask + Renderer::SetColorMask(); - // restore color mask - Renderer::SetColorMask(); - - if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract) - glEnable(GL_BLEND); + if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract) + glEnable(GL_BLEND); } #if defined(_DEBUG) || defined(DEBUGFAST) @@ -342,5 +369,5 @@ void Flush() ResetBuffer(); } - } // namespace + diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.h index d52ef0be88..420d0d4b2e 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.h @@ -28,7 +28,7 @@ namespace VertexManager bool Init(); void Shutdown(); - +void AddIndices(int _primitive, int _numVertices); void ResetBuffer(); }; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp index a2553e4037..9d07d36b28 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp @@ -38,6 +38,8 @@ VertexShaderCache::VSCache VertexShaderCache::vshaders; bool VertexShaderCache::s_displayCompileAlert; +GLuint VertexShaderCache::CurrentShader; +bool VertexShaderCache::ShaderEnabled; static VERTEXSHADER *pShaderLast = NULL; static int s_nMaxVertexInstructions; @@ -119,6 +121,9 @@ void VertexShaderCache::Init() s_displayCompileAlert = true; glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions); + ShaderEnabled = false; + CurrentShader = 0; + EnableShader(0); } void VertexShaderCache::Shutdown() @@ -243,9 +248,10 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr plocal = strstr(plocal + 13, "program.local"); } glGenProgramsARB(1, &vs.glprogid); - glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs.glprogid); - glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); - + EnableShader(vs.glprogid); + //glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs.glprogid); + //CurrentShader = vs.glprogid; + glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); err = GL_REPORT_ERROR(); if (err != GL_NO_ERROR) { ERROR_LOG(VIDEO, pstrprogram); @@ -257,6 +263,42 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr #if defined(_DEBUG) || defined(DEBUGFAST) vs.strprog = pstrprogram; #endif - + return true; } + +void VertexShaderCache::DisableShader() +{ + if(ShaderEnabled) + { + CurrentShader = 0; + glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader); + glDisable(GL_VERTEX_PROGRAM_ARB); + ShaderEnabled = false; + } +} + +void VertexShaderCache::SetCurrentShader(GLuint Shader) +{ + if(ShaderEnabled && CurrentShader != Shader) + { + CurrentShader = Shader; + glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader); + } +} + +void VertexShaderCache::EnableShader(GLuint Shader) +{ + if(!ShaderEnabled) + { + glEnable(GL_VERTEX_PROGRAM_ARB); + ShaderEnabled= true; + CurrentShader = 0; + } + if(CurrentShader != Shader) + { + CurrentShader = Shader; + glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader); + } +} + diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h index 4f786b83cb..9bdc140469 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h @@ -53,6 +53,9 @@ class VertexShaderCache static VSCache vshaders; static bool s_displayCompileAlert; + + static GLuint CurrentShader; + static bool ShaderEnabled; public: static void Init(); @@ -61,6 +64,11 @@ public: static VERTEXSHADER* GetShader(u32 components); static bool CompileVertexShader(VERTEXSHADER& ps, const char* pstrprogram); + + static void SetCurrentShader(GLuint Shader); + + static void DisableShader(); + static void EnableShader(GLuint Shader); }; #endif // _VERTEXSHADERCACHE_H_ \ No newline at end of file diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index 9e1cff9e14..f45d5110a4 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -513,6 +513,7 @@ void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) // Make sure previous swap request has made it to the screen if (g_VideoInitialize.bUseDualCore) { + while (Common::AtomicLoadAcquire(s_swapRequested)) Common::YieldCPU(); }