From 33c24f0a15161de23c26c61e2fc6efeb73def6df Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Dec 2011 22:04:34 -0600 Subject: [PATCH] Almost there. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 2 +- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 2 +- .../Plugin_VideoOGL/Src/PixelShaderCache.cpp | 218 +++++++++++------- .../Plugin_VideoOGL/Src/TextureCache.cpp | 9 +- .../Plugin_VideoOGL/Src/VertexManager.cpp | 29 ++- .../Plugin_VideoOGL/Src/VertexShaderCache.cpp | 3 +- 6 files changed, 175 insertions(+), 88 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 5d7fa9790e..d3d59cde63 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -1263,7 +1263,7 @@ static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode compindex = bpmem.alphaFunc.comp1 % 8; WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table - WRITE(p, ")){ocol0 = 0;%s%s discard;%s}\n", + WRITE(p, ")){ocol0 = float4(0.0);%s%s discard;%s}\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "ocol1 = 0;" : "", DepthTextureEnable ? "depth = 1.f;" : "", (ApiType != API_D3D11) ? "return;" : ""); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index ac35a4aa35..9a6a220979 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -291,7 +291,7 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType) } else { - WRITE(p, "int posmtx = fposmtx;\n"); + WRITE(p, "int posmtx = int(fposmtx);\n"); } WRITE(p, "float4 pos = float4(dot("I_TRANSFORMMATRICES"[posmtx], rawpos), dot("I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot("I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n"); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index 601445988f..6c67252dde 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -38,8 +38,8 @@ namespace OGL { static int s_nMaxPixelInstructions; -static GLuint s_ColorMatrixProgram = 0; -static GLuint s_DepthMatrixProgram = 0; +static FRAGMENTSHADER s_ColorMatrixProgram; +static FRAGMENTSHADER s_DepthMatrixProgram; PixelShaderCache::PSCache PixelShaderCache::PixelShaders; PIXELSHADERUID PixelShaderCache::s_curuid; bool PixelShaderCache::s_displayCompileAlert; @@ -56,12 +56,12 @@ bool (*pCompilePixelShader)(FRAGMENTSHADER&, const char*); GLuint PixelShaderCache::GetDepthMatrixProgram() { - return s_DepthMatrixProgram; + return s_DepthMatrixProgram.glprogid; } GLuint PixelShaderCache::GetColorMatrixProgram() { - return s_ColorMatrixProgram; + return s_ColorMatrixProgram.glprogid; } void PixelShaderCache::Init() @@ -105,98 +105,154 @@ void PixelShaderCache::Init() glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&maxinst); glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, (GLint *)&maxattribs); INFO_LOG(VIDEO, "pixel max_alu=%d, max_inst=%d, max_attrib=%d", s_nMaxPixelInstructions, maxinst, maxattribs); + if(g_ActiveConfig.bUseGLSL) + { + char pmatrixprog[2048]; + sprintf(pmatrixprog, "#extension GL_ARB_texture_rectangle : enable\n" + "uniform sampler2DRect samp0;\n" + "uniform vec4 "I_COLORS"[7];\n" + "void main(){\n" + "vec4 Temp0, Temp1;\n" + "vec4 K0 = vec4(0.5, 0.5, 0.5, 0.5);\n" + "Temp0 = texture2DRect(samp0, gl_TexCoord[0].xy);\n" + "Temp0 = Temp0 * "I_COLORS"[%d];\n" + "Temp0 = Temp0 + K0;\n" + "Temp0 = floor(Temp0);\n" + "Temp0 = Temp0 * "I_COLORS"[%d];\n" + "Temp1.x = dot(Temp0, "I_COLORS"[%d]);\n" + "Temp1.y = dot(Temp0, "I_COLORS"[%d]);\n" + "Temp1.z = dot(Temp0, "I_COLORS"[%d]);\n" + "Temp1.w = dot(Temp0, "I_COLORS"[%d]);\n" + "gl_FragData[0] = Temp1 + "I_COLORS"[%d];\n" + "}\n", C_COLORS+5, C_COLORS+6, C_COLORS, C_COLORS+1, C_COLORS+2, C_COLORS+3, C_COLORS+4); + if(!pCompilePixelShader(s_ColorMatrixProgram, pmatrixprog)) + { + ERROR_LOG(VIDEO, "Failed to create color matrix fragment program"); + s_ColorMatrixProgram.Destroy(); + } + sprintf(pmatrixprog, "#extension GL_ARB_texture_rectangle : enable\n" + "uniform sampler2DRect samp0;\n" + "uniform vec4 "I_COLORS"[5];\n" + "void main(){\n" + "vec4 R0, R1, R2;\n" + "vec4 K0 = vec4(255.99998474121, 0.003921568627451, 256.0, 0.0);\n" + "vec4 K1 = vec4(15.0, 0.066666666666, 0.0, 0.0);\n" + "R2 = texture2DRect(samp0, gl_TexCoord[0].xy);\n" + "R0.x = R2.x * K0.x;\n" + "R0.x = floor(R0).x;\n" + "R0.yzw = (R0 - R0.x).yzw;\n" + "R0.yzw = (R0 * K0.z).yzw;\n" + "R0.y = floor(R0).y;\n" + "R0.zw = (R0 - R0.y).zw;\n" + "R0.zw = (R0 * K0.z).zw;\n" + "R0.z = floor(R0).z;\n" + "R0.w = R0.x;\n" + "R0 = R0 * K0.y;\n" + "R0.w = (R0 * K1.x).w;\n" + "R0.w = floor(R0).w;\n" + "R0.w = (R0 * K1.y).w;\n" + "R1.x = dot(R0, "I_COLORS"[%d]);\n" + "R1.y = dot(R0, "I_COLORS"[%d]);\n" + "R1.z = dot(R0, "I_COLORS"[%d]);\n" + "R1.w = dot(R0, "I_COLORS"[%d]);\n" + "gl_FragData[0] = R1 * "I_COLORS"[%d];\n" + "}\n", C_COLORS, C_COLORS+1, C_COLORS+2, C_COLORS+3, C_COLORS+4); + if(!pCompilePixelShader(s_DepthMatrixProgram, pmatrixprog)) + { + ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program"); + s_DepthMatrixProgram.Destroy(); + } + } + else + { + char pmatrixprog[2048]; + sprintf(pmatrixprog, "!!ARBfp1.0" + "TEMP R0;\n" + "TEMP R1;\n" + "PARAM K0 = { 0.5, 0.5, 0.5, 0.5};\n" + "TEX R0, fragment.texcoord[0], texture[0], RECT;\n" + "MUL R0, R0, program.env[%d];\n" + "ADD R0, R0, K0;\n" + "FLR R0, R0;\n" + "MUL R0, R0, program.env[%d];\n" + "DP4 R1.x, R0, program.env[%d];\n" + "DP4 R1.y, R0, program.env[%d];\n" + "DP4 R1.z, R0, program.env[%d];\n" + "DP4 R1.w, R0, program.env[%d];\n" + "ADD result.color, R1, program.env[%d];\n" + "END\n",C_COLORS+5,C_COLORS+6, C_COLORS, C_COLORS+1, C_COLORS+2, C_COLORS+3, C_COLORS+4); + glGenProgramsARB(1, &s_ColorMatrixProgram.glprogid); + SetCurrentShader(s_ColorMatrixProgram.glprogid); + glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); - char pmatrixprog[2048]; - sprintf(pmatrixprog, "!!ARBfp1.0" - "TEMP R0;\n" - "TEMP R1;\n" - "PARAM K0 = { 0.5, 0.5, 0.5, 0.5};\n" - "TEX R0, fragment.texcoord[0], texture[0], RECT;\n" - "MUL R0, R0, program.env[%d];\n" - "ADD R0, R0, K0;\n" - "FLR R0, R0;\n" - "MUL R0, R0, program.env[%d];\n" - "DP4 R1.x, R0, program.env[%d];\n" - "DP4 R1.y, R0, program.env[%d];\n" - "DP4 R1.z, R0, program.env[%d];\n" - "DP4 R1.w, R0, program.env[%d];\n" - "ADD result.color, R1, program.env[%d];\n" - "END\n",C_COLORMATRIX+5,C_COLORMATRIX+6, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4); - glGenProgramsARB(1, &s_ColorMatrixProgram); - SetCurrentShader(s_ColorMatrixProgram); - glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); + GLenum err = GL_REPORT_ERROR(); + if (err != GL_NO_ERROR) { + ERROR_LOG(VIDEO, "Failed to create color matrix fragment program"); + s_ColorMatrixProgram.Destroy(); + } - GLenum err = GL_REPORT_ERROR(); - if (err != GL_NO_ERROR) { - ERROR_LOG(VIDEO, "Failed to create color matrix fragment program"); - glDeleteProgramsARB(1, &s_ColorMatrixProgram); - s_ColorMatrixProgram = 0; - } + sprintf(pmatrixprog, "!!ARBfp1.0\n" + "TEMP R0;\n" + "TEMP R1;\n" + "TEMP R2;\n" + //16777215/16777216*256, 1/255, 256, 0 + "PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n" + "PARAM K1 = { 15.0, 0.066666666666, 0.0, 0.0};\n" + //sample the depth value + "TEX R2, fragment.texcoord[0], texture[0], RECT;\n" - sprintf(pmatrixprog, "!!ARBfp1.0\n" - "TEMP R0;\n" - "TEMP R1;\n" - "TEMP R2;\n" - //16777215/16777216*256, 1/255, 256, 0 - "PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n" - "PARAM K1 = { 15.0, 0.066666666666, 0.0, 0.0};\n" - //sample the depth value - "TEX R2, fragment.texcoord[0], texture[0], RECT;\n" + //scale from [0*16777216..1*16777216] to + //[0*16777215..1*16777215], multiply by 256 + "MUL R0, R2.x, K0.x;\n" // *16777215/16777216*256 - //scale from [0*16777216..1*16777216] to - //[0*16777215..1*16777215], multiply by 256 - "MUL R0, R2.x, K0.x;\n" // *16777215/16777216*256 + //It is easy to get bad results due to low precision + //here, for example converting like this: + //MUL R0,R0,{ 65536, 256, 1, 16777216 } + //FRC R0,R0 + //gives {?, 128/255, 254/255, ?} for depth value 254/255 + //on some gpus - //It is easy to get bad results due to low precision - //here, for example converting like this: - //MUL R0,R0,{ 65536, 256, 1, 16777216 } - //FRC R0,R0 - //gives {?, 128/255, 254/255, ?} for depth value 254/255 - //on some gpus + "FLR R0.x,R0;\n" //bits 31..24 - "FLR R0.x,R0;\n" //bits 31..24 + "SUB R0.yzw,R0,R0.x;\n" //subtract bits 31..24 from rest + "MUL R0.yzw,R0,K0.z;\n" // *256 + "FLR R0.y,R0;\n" //bits 23..16 - "SUB R0.yzw,R0,R0.x;\n" //subtract bits 31..24 from rest - "MUL R0.yzw,R0,K0.z;\n" // *256 - "FLR R0.y,R0;\n" //bits 23..16 + "SUB R0.zw,R0,R0.y;\n" //subtract bits 23..16 from rest + "MUL R0.zw,R0,K0.z;\n" // *256 + "FLR R0.z,R0;\n" //bits 15..8 - "SUB R0.zw,R0,R0.y;\n" //subtract bits 23..16 from rest - "MUL R0.zw,R0,K0.z;\n" // *256 - "FLR R0.z,R0;\n" //bits 15..8 + "MOV R0.w,R0.x;\n" //duplicate bit 31..24 - "MOV R0.w,R0.x;\n" //duplicate bit 31..24 + "MUL R0,R0,K0.y;\n" // /255 - "MUL R0,R0,K0.y;\n" // /255 + "MUL R0.w,R0,K1.x;\n" // *15 + "FLR R0.w,R0;\n" //bits 31..28 + "MUL R0.w,R0,K1.y;\n" // /15 - "MUL R0.w,R0,K1.x;\n" // *15 - "FLR R0.w,R0;\n" //bits 31..28 - "MUL R0.w,R0,K1.y;\n" // /15 + "DP4 R1.x, R0, program.env[%d];\n" + "DP4 R1.y, R0, program.env[%d];\n" + "DP4 R1.z, R0, program.env[%d];\n" + "DP4 R1.w, R0, program.env[%d];\n" + "ADD result.color, R1, program.env[%d];\n" + "END\n", C_COLORS, C_COLORS+1, C_COLORS+2, C_COLORS+3, C_COLORS+4); + glGenProgramsARB(1, &s_DepthMatrixProgram.glprogid); + SetCurrentShader(s_DepthMatrixProgram.glprogid); + glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); - "DP4 R1.x, R0, program.env[%d];\n" - "DP4 R1.y, R0, program.env[%d];\n" - "DP4 R1.z, R0, program.env[%d];\n" - "DP4 R1.w, R0, program.env[%d];\n" - "ADD result.color, R1, program.env[%d];\n" - "END\n", C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4); - glGenProgramsARB(1, &s_DepthMatrixProgram); - SetCurrentShader(s_DepthMatrixProgram); - glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); - - err = GL_REPORT_ERROR(); - if (err != GL_NO_ERROR) { - ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program"); - glDeleteProgramsARB(1, &s_DepthMatrixProgram); - s_DepthMatrixProgram = 0; - } + err = GL_REPORT_ERROR(); + if (err != GL_NO_ERROR) { + ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program"); + s_DepthMatrixProgram.Destroy(); + } + } } void PixelShaderCache::Shutdown() { - glDeleteProgramsARB(1, &s_ColorMatrixProgram); - s_ColorMatrixProgram = 0; - glDeleteProgramsARB(1, &s_DepthMatrixProgram); - s_DepthMatrixProgram = 0; + s_ColorMatrixProgram.Destroy(); + s_DepthMatrixProgram.Destroy(); PSCache::iterator iter = PixelShaders.begin(); for (; iter != PixelShaders.end(); iter++) iter->second.Destroy(); @@ -235,7 +291,6 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp // Make an entry in the table PSCacheEntry& newentry = PixelShaders[uid]; last_entry = &newentry; - newentry.shader.bGLSL = g_ActiveConfig.bUseGLSL; const char *code = GeneratePixelShaderCode(dstAlphaMode, g_ActiveConfig.bUseGLSL ? API_GLSL : API_OPENGL, components); if (g_ActiveConfig.bEnableShaderDebugging && code) @@ -320,7 +375,6 @@ bool CompileGLSLPixelShader(FRAGMENTSHADER& ps, const char* pstrprogram) FILE *fp = fopen(szTemp, "wb"); fwrite(pstrprogram, strlen(pstrprogram), 1, fp); fclose(fp); - if(strstr(infoLog, "warning") != NULL || strstr(infoLog, "error") != NULL) exit(0); delete[] infoLog; @@ -339,6 +393,7 @@ bool CompileGLSLPixelShader(FRAGMENTSHADER& ps, const char* pstrprogram) (void)GL_REPORT_ERROR(); ps.glprogid = result; + ps.bGLSL = true; return true; } void PixelShaderCache::SetPSSampler(const char * name, unsigned int Tex) @@ -462,6 +517,7 @@ bool CompileCGPixelShader(FRAGMENTSHADER& ps, const char* pstrprogram) } glGenProgramsARB(1, &ps.glprogid); + ps.bGLSL = false; PixelShaderCache::SetCurrentShader(ps.glprogid); glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp index 4a9431a563..d3a1283627 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp @@ -42,6 +42,7 @@ #include "ImageWrite.h" #include "MemoryUtil.h" #include "PixelShaderCache.h" +#include "ProgramShaderCache.h" #include "PixelShaderManager.h" #include "Render.h" #include "Statistics.h" @@ -296,7 +297,13 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo glViewport(0, 0, virtualW, virtualH); - PixelShaderCache::SetCurrentShader((srcFormat == PIXELFMT_Z24) ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); + if(g_ActiveConfig.bUseGLSL) + { + ProgramShaderCache::SetBothShaders((srcFormat == PIXELFMT_Z24) ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram(), 0); + PixelShaderCache::SetPSSampler("samp0", 0); + } + else + PixelShaderCache::SetCurrentShader((srcFormat == PIXELFMT_Z24) ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); PixelShaderManager::SetColorMatrix(colmat); // set transformation GL_REPORT_ERRORD(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 2c97b904b2..ebca599fe8 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -129,9 +129,6 @@ void VertexManager::vFlush() //glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer, GL_STREAM_DRAW); GL_REPORT_ERRORD(); - // setup the pointers - if (g_nativeVertexFmt) - g_nativeVertexFmt->SetupVertexPointers(); GL_REPORT_ERRORD(); u32 usedtextures = 0; @@ -221,6 +218,21 @@ void VertexManager::vFlush() // set global constants VertexShaderManager::SetConstants(); PixelShaderManager::SetConstants(); + + // setup the pointers + if (g_nativeVertexFmt) + g_nativeVertexFmt->SetupVertexPointers(); + GL_REPORT_ERRORD(); + if(g_ActiveConfig.bUseGLSL) + for (int i = 0; i < 8; i++) + { + if (usedtextures & (1 << i)) + { + char tmp[16]; + sprintf(tmp, "samp%d", i); // Bake this in to something so we don't have to sprintf? + PixelShaderCache::SetPSSampler(tmp, i); + } + } Draw(); @@ -232,6 +244,17 @@ void VertexManager::vFlush() { ProgramShaderCache::SetBothShaders(ps->glprogid, 0); PixelShaderManager::SetConstants(); // Need to set these again + if (g_nativeVertexFmt) + g_nativeVertexFmt->SetupVertexPointers(); + for (int i = 0; i < 8; i++) + { + if (usedtextures & (1 << i)) + { + char tmp[16]; + sprintf(tmp, "samp%d", i); // Bake this in to something so we don't have to sprintf? + PixelShaderCache::SetPSSampler(tmp, i); + } + } } else if (ps) PixelShaderCache::SetCurrentShader(ps->glprogid); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp index c543dfb86e..34c8020a00 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp @@ -125,7 +125,6 @@ VERTEXSHADER* VertexShaderCache::SetShader(u32 components) // Make an entry in the table VSCacheEntry& entry = vshaders[uid]; last_entry = &entry; - entry.shader.bGLSL = g_ActiveConfig.bUseGLSL; const char *code = GenerateVertexShaderCode(components, g_ActiveConfig.bUseGLSL ? API_GLSL : API_OPENGL); GetSafeVertexShaderId(&entry.safe_uid, components); @@ -223,6 +222,7 @@ bool CompileGLSLVertexShader(VERTEXSHADER& vs, const char* pstrprogram) (void)GL_REPORT_ERROR(); vs.glprogid = result; + vs.bGLSL = true; return true; } void SetVSConstant4fvByName(const char * name, unsigned int offset, const float *f, const unsigned int count = 1) @@ -352,6 +352,7 @@ bool CompileCGVertexShader(VERTEXSHADER& vs, const char* pstrprogram) plocal = strstr(plocal + 13, "program.local"); } glGenProgramsARB(1, &vs.glprogid); + vs.bGLSL = false; VertexShaderCache::SetCurrentShader(vs.glprogid); glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);