OpenGL: commit rodolfoosvaldobogado's (what a name!) speedup patches.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4322 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-09-26 12:39:12 +00:00
parent 5a7fbd63c2
commit 29808cdde2
16 changed files with 295 additions and 150 deletions

View File

@ -32,6 +32,7 @@ void IndexGenerator::Start(unsigned short *startptr)
index = 0; index = 0;
numPrims = 0; numPrims = 0;
adds = 0; adds = 0;
indexLen = 0;
onlyLists = true; onlyLists = true;
} }
@ -45,6 +46,7 @@ void IndexGenerator::AddList(int numVerts)
*ptr++ = index+i*3+1; *ptr++ = index+i*3+1;
*ptr++ = index+i*3+2; *ptr++ = index+i*3+2;
} }
indexLen += numVerts;
index += numVerts; index += numVerts;
numPrims += numTris; numPrims += numTris;
adds++; adds++;
@ -62,6 +64,7 @@ void IndexGenerator::AddStrip(int numVerts)
*ptr++ = index+i+(wind?1:2); *ptr++ = index+i+(wind?1:2);
wind = !wind; wind = !wind;
} }
indexLen += numTris * 3;
index += numVerts; index += numVerts;
numPrims += numTris; numPrims += numTris;
adds++; adds++;
@ -77,6 +80,7 @@ void IndexGenerator::AddLineList(int numVerts)
*ptr++ = index+i*2; *ptr++ = index+i*2;
*ptr++ = index+i*2+1; *ptr++ = index+i*2+1;
} }
indexLen += numVerts;
index += numVerts; index += numVerts;
numPrims += numLines; numPrims += numLines;
adds++; adds++;
@ -91,6 +95,7 @@ void IndexGenerator::AddLineStrip(int numVerts)
*ptr++ = index+i; *ptr++ = index+i;
*ptr++ = index+i+1; *ptr++ = index+i+1;
} }
indexLen += numLines * 2;
index += numVerts; index += numVerts;
numPrims += numLines; numPrims += numLines;
adds++; adds++;
@ -107,6 +112,7 @@ void IndexGenerator::AddFan(int numVerts)
*ptr++ = index+i+1; *ptr++ = index+i+1;
*ptr++ = index+i+2; *ptr++ = index+i+2;
} }
indexLen += numTris * 3;
index += numVerts; index += numVerts;
numPrims += numTris; numPrims += numTris;
adds++; adds++;
@ -126,6 +132,7 @@ void IndexGenerator::AddQuads(int numVerts)
*ptr++ = index+i*4+2; *ptr++ = index+i*4+2;
*ptr++ = index+i*4+3; *ptr++ = index+i*4+3;
} }
indexLen += numTris * 3;
index += numVerts; index += numVerts;
numPrims += numTris; numPrims += numTris;
adds++; adds++;

View File

@ -35,12 +35,14 @@ public:
int GetNumPrims() {return numPrims;} //returns numprimitives int GetNumPrims() {return numPrims;} //returns numprimitives
int GetNumVerts() {return index;} //returns numprimitives int GetNumVerts() {return index;} //returns numprimitives
int GetNumAdds() {return adds;} int GetNumAdds() {return adds;}
int GetindexLen() {return indexLen;}
bool GetOnlyLists() {return onlyLists;} bool GetOnlyLists() {return onlyLists;}
private: private:
unsigned short *ptr; unsigned short *ptr;
int numPrims; int numPrims;
int index; int index;
int adds; int adds;
int indexLen;
bool onlyLists; bool onlyLists;
}; };

View File

@ -602,7 +602,8 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
// Flush if our vertex format is different from the currently set. // Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt) if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
{ {
VertexManager::Flush(); //Don't flush here we can join some primitives, let the vertex manager do this work
//VertexManager::Flush();
// Also move the Set() here? // Also move the Set() here?
} }
g_nativeVertexFmt = m_NativeFmt; g_nativeVertexFmt = m_NativeFmt;

View File

@ -122,11 +122,12 @@ void VertexShaderManager::SetConstants()
for (int i = istart; i < iend; ++i) for (int i = istart; i < iend; ++i)
{ {
u32 color = *(const u32*)(xfmemptr + 3); u32 color = *(const u32*)(xfmemptr + 3);
float NormalizationCoef = 1 / 255.0f;
SetVSConstant4f(C_LIGHTS + 5 * i, SetVSConstant4f(C_LIGHTS + 5 * i,
((color >> 24) & 0xFF) / 255.0f, ((color >> 24) & 0xFF) * NormalizationCoef,
((color >> 16) & 0xFF) / 255.0f, ((color >> 16) & 0xFF) * NormalizationCoef,
((color >> 8) & 0xFF) / 255.0f, ((color >> 8) & 0xFF) * NormalizationCoef,
((color) & 0xFF) / 255.0f); ((color) & 0xFF) * NormalizationCoef);
xfmemptr += 4; xfmemptr += 4;
for (int j = 0; j < 4; ++j, xfmemptr += 3) for (int j = 0; j < 4; ++j, xfmemptr += 3)
@ -466,11 +467,11 @@ void VertexShaderManager::SetMaterialColor(int index, u32 data)
int ind = index * 4; int ind = index * 4;
nMaterialsChanged |= (1 << index); nMaterialsChanged |= (1 << index);
float NormalizationCoef = 1 / 255.0f;
s_fMaterials[ind++] = ((data >> 24) & 0xFF) / 255.0f; s_fMaterials[ind++] = ((data >> 24) & 0xFF) * NormalizationCoef;
s_fMaterials[ind++] = ((data >> 16) & 0xFF) / 255.0f; s_fMaterials[ind++] = ((data >> 16) & 0xFF) * NormalizationCoef;
s_fMaterials[ind++] = ((data >> 8) & 0xFF) / 255.0f; s_fMaterials[ind++] = ((data >> 8) & 0xFF) * NormalizationCoef;
s_fMaterials[ind] = ( data & 0xFF) / 255.0f; s_fMaterials[ind] = ( data & 0xFF) * NormalizationCoef;
} }
void VertexShaderManager::TranslateView(float x, float y) void VertexShaderManager::TranslateView(float x, float y)

View File

@ -58,7 +58,7 @@ void FramebufferManager::Init(int targetWidth, int targetHeight, int msaaSamples
m_efbDepth = glObj[1]; m_efbDepth = glObj[1];
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbColor); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbColor);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbDepth); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbDepth);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
@ -119,7 +119,7 @@ void FramebufferManager::Init(int targetWidth, int targetHeight, int msaaSamples
m_resolvedDepthTexture = glObj[1]; m_resolvedDepthTexture = glObj[1];
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedColorTexture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedColorTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedDepthTexture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedDepthTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
@ -343,7 +343,7 @@ void FramebufferManager::copyToVirtualXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight
// the image will be allocated by glCopyTexImage2D (later). // the image will be allocated by glCopyTexImage2D (later).
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, xfbTexture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, xfbTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, m_targetWidth, m_targetHeight, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
} }
@ -431,7 +431,7 @@ const XFBSource* FramebufferManager::getRealXFBSource(u32 xfbAddr, u32 fbWidth,
glGenTextures(1, &m_realXFBSource.texture); glGenTextures(1, &m_realXFBSource.texture);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_realXFBSource.texture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_realXFBSource.texture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, MAX_XFB_WIDTH, MAX_XFB_HEIGHT, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, MAX_XFB_WIDTH, MAX_XFB_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
} }

View File

@ -40,6 +40,8 @@ static GLuint s_DepthMatrixProgram = 0;
PixelShaderCache::PSCache PixelShaderCache::pshaders; PixelShaderCache::PSCache PixelShaderCache::pshaders;
PIXELSHADERUID PixelShaderCache::s_curuid; PIXELSHADERUID PixelShaderCache::s_curuid;
bool PixelShaderCache::s_displayCompileAlert; bool PixelShaderCache::s_displayCompileAlert;
GLuint PixelShaderCache::CurrentShader;
bool PixelShaderCache::ShaderEnabled;
static FRAGMENTSHADER* pShaderLast = NULL; static FRAGMENTSHADER* pShaderLast = NULL;
static float lastPSconstants[C_COLORMATRIX+16][4]; static float lastPSconstants[C_COLORMATRIX+16][4];
@ -138,6 +140,9 @@ void PixelShaderCache::Init()
glDeleteProgramsARB(1, &s_DepthMatrixProgram); glDeleteProgramsARB(1, &s_DepthMatrixProgram);
s_DepthMatrixProgram = 0; s_DepthMatrixProgram = 0;
} }
CurrentShader=0;
ShaderEnabled = false;
EnableShader(s_DepthMatrixProgram);
} }
void PixelShaderCache::Shutdown() void PixelShaderCache::Shutdown()
@ -280,7 +285,9 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr
} }
glGenProgramsARB(1, &ps.glprogid); glGenProgramsARB(1, &ps.glprogid);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps.glprogid); EnableShader(ps.glprogid);
//glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps.glprogid);
//CurrentShader = ps.glprogid;
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);
err = GL_REPORT_ERROR(); err = GL_REPORT_ERROR();
@ -312,3 +319,43 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr
#endif #endif
return true; return true;
} }
//Disable Fragment programs and reset the selected Program
void PixelShaderCache::DisableShader()
{
CurrentShader = 0;
if(ShaderEnabled)
{
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = false;
}
}
//bind a program if is diferent from the binded oone
void PixelShaderCache::SetCurrentShader(GLuint Shader)
{
//The caching here breakes Super Mario Sunshine i'm still trying to figure out wy
if(ShaderEnabled /*&& CurrentShader != Shader*/)
{
CurrentShader = Shader;
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader);
}
}
//Enable Fragment program and bind initial program
void PixelShaderCache::EnableShader(GLuint Shader)
{
if(!ShaderEnabled)
{
glEnable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = true;
CurrentShader = 0;
}
if(CurrentShader != Shader)
{
CurrentShader = Shader;
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader);
}
}

View File

@ -63,6 +63,10 @@ class PixelShaderCache
static bool s_displayCompileAlert; static bool s_displayCompileAlert;
static GLuint CurrentShader;
static bool ShaderEnabled;
public: public:
static void Init(); static void Init();
static void ProgressiveCleanup(); static void ProgressiveCleanup();
@ -74,6 +78,12 @@ public:
static GLuint GetColorMatrixProgram(); static GLuint GetColorMatrixProgram();
static GLuint GetDepthMatrixProgram(); static GLuint GetDepthMatrixProgram();
static void SetCurrentShader(GLuint Shader);
static void DisableShader();
static void EnableShader(GLuint Shader);
}; };
#endif // _PIXELSHADERCACHE_H_ #endif // _PIXELSHADERCACHE_H_

View File

@ -74,14 +74,12 @@ bool ApplyShader()
if (s_shader.glprogid != 0) if (s_shader.glprogid != 0)
{ {
glEnable(GL_FRAGMENT_PROGRAM_ARB); PixelShaderCache::EnableShader(s_shader.glprogid);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_shader.glprogid);
return true; return true;
} }
else else
{ {
glDisable(GL_FRAGMENT_PROGRAM_ARB); PixelShaderCache::DisableShader();
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
return false; return false;
} }
} }

View File

@ -564,8 +564,8 @@ void Renderer::ResetAPIState()
{ {
// Gets us to a reasonably sane state where it's possible to do things like // Gets us to a reasonably sane state where it's possible to do things like
// image copies with textured quads, etc. // image copies with textured quads, etc.
glDisable(GL_VERTEX_PROGRAM_ARB); VertexShaderCache::DisableShader();
glDisable(GL_FRAGMENT_PROGRAM_ARB); PixelShaderCache::DisableShader();
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
glDisable(GL_DEPTH_TEST); glDisable(GL_DEPTH_TEST);
@ -597,8 +597,8 @@ void Renderer::RestoreAPIState()
SetColorMask(); SetColorMask();
SetBlendMode(true); SetBlendMode(true);
glEnable(GL_VERTEX_PROGRAM_ARB); VertexShaderCache::EnableShader(0);
glEnable(GL_FRAGMENT_PROGRAM_ARB); PixelShaderCache::EnableShader(0);
} }
void Renderer::SetColorMask() void Renderer::SetColorMask()
@ -843,7 +843,6 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
{ {
if (s_skipSwap) if (s_skipSwap)
return; return;
const XFBSource* xfbSource = g_framebufferManager.GetXFBSource(xfbAddr, fbWidth, fbHeight); const XFBSource* xfbSource = g_framebufferManager.GetXFBSource(xfbAddr, fbWidth, fbHeight);
if (!xfbSource) if (!xfbSource)
{ {
@ -917,9 +916,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
glTexCoord2f(sourceRc.right, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f( 1, 1); glTexCoord2f(sourceRc.right, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f( 1, 1);
glTexCoord2f(sourceRc.right, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f( 1, -1); glTexCoord2f(sourceRc.right, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f( 1, -1);
glEnd(); glEnd();
PixelShaderCache::DisableShader();;
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
} }
else else
{ {
@ -1070,12 +1067,12 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
// --------------------------------------------------------------------- // ---------------------------------------------------------------------
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
for (int i = 0; i < 8; i++) { /*for (int i = 0; i < 8; i++) {
glActiveTexture(GL_TEXTURE0 + i); glActiveTexture(GL_TEXTURE0 + i);
glDisable(GL_TEXTURE_2D); glDisable(GL_TEXTURE_2D);
glDisable(GL_TEXTURE_RECTANGLE_ARB); glDisable(GL_TEXTURE_RECTANGLE_ARB);
} }
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);*/
DrawDebugText(); DrawDebugText();
@ -1088,7 +1085,6 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
if (blend_enabled) if (blend_enabled)
glEnable(GL_BLEND); glEnable(GL_BLEND);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
#if defined(DVPROFILE) #if defined(DVPROFILE)
if (g_bWriteProfile) { if (g_bWriteProfile) {
//g_bWriteProfile = 0; //g_bWriteProfile = 0;
@ -1107,8 +1103,8 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
// Clear framebuffer // Clear framebuffer
glClearColor(0, 0, 0, 0); //glClearColor(0, 0, 0, 0);
glClear(GL_COLOR_BUFFER_BIT); //glClear(GL_COLOR_BUFFER_BIT);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();

View File

@ -34,6 +34,8 @@ namespace TextureConverter
static GLuint s_texConvFrameBuffer = 0; static GLuint s_texConvFrameBuffer = 0;
static GLuint s_srcTexture = 0; // for decoding from RAM static GLuint s_srcTexture = 0; // for decoding from RAM
static GLuint s_srcTextureWidth = 0;
static GLuint s_srcTextureHeight = 0;
static GLuint s_dstRenderBuffer = 0; // for encoding to RAM static GLuint s_dstRenderBuffer = 0; // for encoding to RAM
const int renderBufferWidth = 1024; const int renderBufferWidth = 1024;
@ -60,15 +62,12 @@ void CreateRgbToYuyvProgram()
" float2 uv1 = float2(uv0.x + 1.0f, uv0.y);\n" " float2 uv1 = float2(uv0.x + 1.0f, uv0.y);\n"
" float3 c0 = texRECT(samp0, uv0).rgb;\n" " float3 c0 = texRECT(samp0, uv0).rgb;\n"
" float3 c1 = texRECT(samp0, uv1).rgb;\n" " float3 c1 = texRECT(samp0, uv1).rgb;\n"
" float3 y_const = float3(0.257f,0.504f,0.098f);\n"
" float y0 = (0.257f * c0.r) + (0.504f * c0.g) + (0.098f * c0.b) + 0.0625f;\n" " float3 u_const = float3(-0.148f,-0.291f,0.439f);\n"
" float u0 =-(0.148f * c0.r) - (0.291f * c0.g) + (0.439f * c0.b) + 0.5f;\n" " float3 v_const = float3(0.439f,-0.368f,-0.071f);\n"
" float v0 = (0.439f * c0.r) - (0.368f * c0.g) - (0.071f * c0.b) + 0.5f;\n" " float4 const3 = float4(0.0625f,0.5f,0.0625f,0.5f);\n"
" float y1 = (0.257f * c1.r) + (0.504f * c1.g) + (0.098f * c1.b) + 0.0625f;\n" " float3 c01 = (c0 + c1) * 0.5f;\n"
" float u1 =-(0.148f * c1.r) - (0.291f * c1.g) + (0.439f * c1.b) + 0.5f;\n" " ocol0 = float4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n"
" float v1 = (0.439f * c1.r) - (0.368f * c1.g) - (0.071f * c1.b) + 0.5f;\n"
" ocol0 = float4(y1, (u0 + u1) / 2, y0, (v0 + v1) / 2);\n"
"}\n"; "}\n";
if (!PixelShaderCache::CompilePixelShader(s_rgbToYuyvProgram, FProgram)) { if (!PixelShaderCache::CompilePixelShader(s_rgbToYuyvProgram, FProgram)) {
@ -204,8 +203,7 @@ void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const Tar
glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight); glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight);
glEnable(GL_FRAGMENT_PROGRAM_ARB); PixelShaderCache::EnableShader(shader.glprogid);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader.glprogid);
// Draw... // Draw...
glBegin(GL_QUADS); glBegin(GL_QUADS);
@ -218,7 +216,7 @@ void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const Tar
// .. and then readback the results. // .. and then readback the results.
// TODO: make this less slow. // TODO: make this less slow.
glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr); glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destAddr);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
g_framebufferManager.SetFramebuffer(0); g_framebufferManager.SetFramebuffer(0);
@ -332,12 +330,20 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_srcTexture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_srcTexture);
// TODO: make this less slow. (How?) // TODO: make this less slow. (How?)
if(s_srcTextureWidth == (GLsizei)srcFmtWidth && s_srcTextureHeight == (GLsizei)srcHeight)
{
glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,0,0,s_srcTextureWidth, s_srcTextureHeight, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
}
else
{
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, (GLsizei)srcFmtWidth, (GLsizei)srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, (GLsizei)srcFmtWidth, (GLsizei)srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
s_srcTextureWidth = (GLsizei)srcFmtWidth;
s_srcTextureHeight = (GLsizei)srcHeight;
}
glViewport(0, 0, srcWidth, srcHeight); glViewport(0, 0, srcWidth, srcHeight);
glEnable(GL_FRAGMENT_PROGRAM_ARB); PixelShaderCache::EnableShader(s_yuyvToRgbProgram.glprogid);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_yuyvToRgbProgram.glprogid);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();

View File

@ -513,7 +513,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
{ {
glGenTextures(1, (GLuint *)&entry.texture); glGenTextures(1, (GLuint *)&entry.texture);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, entry.texture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, entry.texture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
} }
else else
@ -531,7 +531,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
glDeleteTextures(1,(GLuint *)&entry.texture); glDeleteTextures(1,(GLuint *)&entry.texture);
glGenTextures(1, (GLuint *)&entry.texture); glGenTextures(1, (GLuint *)&entry.texture);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, entry.texture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, entry.texture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
} }
} }
@ -692,8 +692,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
glViewport(0, 0, w, h); glViewport(0, 0, w, h);
glEnable(GL_FRAGMENT_PROGRAM_ARB); PixelShaderCache::EnableShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram());
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram());
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();

View File

@ -37,8 +37,9 @@
#include "VertexShaderGen.h" #include "VertexShaderGen.h"
#include "VertexLoader.h" #include "VertexLoader.h"
#include "VertexManager.h" #include "VertexManager.h"
#include "IndexGenerator.h"
#define MAX_BUFFER_SIZE 0x4000 #define MAX_BUFFER_SIZE 0x50000
// internal state for loading vertices // internal state for loading vertices
extern NativeVertexFormat *g_nativeVertexFmt; extern NativeVertexFormat *g_nativeVertexFmt;
@ -46,13 +47,17 @@ extern NativeVertexFormat *g_nativeVertexFmt;
namespace VertexManager namespace VertexManager
{ {
static GLuint s_vboBuffers[0x40] = {0}; static const GLenum c_RenderprimitiveType[8] =
static int s_nCurVBOIndex = 0; // current free buffer {
static u8 *s_pBaseBufferPointer = NULL; GL_TRIANGLES,
static std::vector< GLint > s_vertexFirstOffset; GL_ZERO, //nothing
static std::vector< GLsizei > s_vertexGroupSize; GL_TRIANGLES,
static std::vector< std::pair< GLenum, int > > s_vertexGroups; GL_TRIANGLES,
u32 s_vertexCount; GL_TRIANGLES,
GL_LINES,
GL_LINES,
GL_POINTS
};
static const GLenum c_primitiveType[8] = static const GLenum c_primitiveType[8] =
{ {
@ -66,35 +71,48 @@ static const GLenum c_primitiveType[8] =
GL_POINTS GL_POINTS
}; };
static IndexGenerator indexGen;
static GLenum lastPrimitive;
static GLenum CurrentRenderPrimitive;
static u8 *LocalVBuffer;
static u16 *IBuffer;
#define MAXVBUFFERSIZE 0x50000
#define MAXIBUFFERSIZE 0x20000
#define MAXVBOBUFFERCOUNT 0x4
static GLuint s_vboBuffers[MAXVBOBUFFERCOUNT] = {0};
static GLuint s_IBuffers[MAXVBOBUFFERCOUNT] = {0};
static int s_nCurVBOIndex = 0; // current free buffer
bool Init() bool Init()
{ {
s_pBaseBufferPointer = (u8*)AllocateMemoryPages(MAX_BUFFER_SIZE); lastPrimitive = GL_ZERO;
s_pCurBufferPointer = s_pBaseBufferPointer; CurrentRenderPrimitive = GL_ZERO;
LocalVBuffer = new u8[MAXVBUFFERSIZE];
IBuffer = new u16[MAXIBUFFERSIZE];
s_pCurBufferPointer = LocalVBuffer;
s_nCurVBOIndex = 0; s_nCurVBOIndex = 0;
glGenBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); glGenBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers);
for (u32 i = 0; i < ARRAYSIZE(s_vboBuffers); ++i) { for (u32 i = 0; i < ARRAYSIZE(s_vboBuffers); ++i) {
glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[i]); glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[i]);
glBufferData(GL_ARRAY_BUFFER, MAX_BUFFER_SIZE, NULL, GL_STREAM_DRAW); glBufferData(GL_ARRAY_BUFFER, MAXVBUFFERSIZE, NULL, GL_STREAM_DRAW);
} }
glEnableClientState(GL_VERTEX_ARRAY); glEnableClientState(GL_VERTEX_ARRAY);
g_nativeVertexFmt = NULL; g_nativeVertexFmt = NULL;
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
return true; return true;
} }
void Shutdown() void Shutdown()
{ {
FreeMemoryPages(s_pBaseBufferPointer, MAX_BUFFER_SIZE); s_pBaseBufferPointer = s_pCurBufferPointer = NULL; delete [] LocalVBuffer;
delete [] IBuffer;
glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers);
memset(s_vboBuffers, 0, sizeof(s_vboBuffers));
s_vertexFirstOffset.resize(0);
s_vertexGroupSize.resize(0);
s_vertexGroups.resize(0);
s_vertexCount = 0;
s_nCurVBOIndex = 0; s_nCurVBOIndex = 0;
ResetBuffer(); ResetBuffer();
} }
@ -102,58 +120,85 @@ void Shutdown()
void ResetBuffer() void ResetBuffer()
{ {
s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
s_pCurBufferPointer = s_pBaseBufferPointer; s_pCurBufferPointer = LocalVBuffer;
s_vertexFirstOffset.resize(0); CurrentRenderPrimitive = GL_ZERO;
s_vertexGroupSize.resize(0); u16 *ptr = 0;
s_vertexGroups.resize(0); indexGen.Start((unsigned short*)ptr);
s_vertexCount = 0; }
void AddIndices(int _primitive, int _numVertices)
{
switch (_primitive)
{
case GL_QUADS: indexGen.AddQuads(_numVertices); return;
case GL_TRIANGLES: indexGen.AddList(_numVertices); return;
case GL_TRIANGLE_STRIP: indexGen.AddStrip(_numVertices); return;
case GL_TRIANGLE_FAN: indexGen.AddFan(_numVertices); return;
case GL_LINE_STRIP: indexGen.AddLineStrip(_numVertices); return;
case GL_LINES: indexGen.AddLineList(_numVertices); return;
case GL_POINTS: indexGen.AddPoints(_numVertices); return;
}
} }
int GetRemainingSize() int GetRemainingSize()
{ {
return MAX_BUFFER_SIZE - (int)(s_pCurBufferPointer - s_pBaseBufferPointer); return LocalVBuffer + MAXVBUFFERSIZE - s_pCurBufferPointer;
} }
void AddVertices(int primitive, int numvertices) void AddVertices(int primitive, int numvertices)
{ {
_assert_(numvertices > 0); if (numvertices <= 0)
_assert_(g_nativeVertexFmt != NULL); return;
if (c_primitiveType[primitive] == GL_ZERO)
return;
DVSTARTPROFILE();
lastPrimitive = c_primitiveType[primitive];
ADDSTAT(stats.thisFrame.numPrims, numvertices); ADDSTAT(stats.thisFrame.numPrims, numvertices);
if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive]) {
// We can join primitives for free here. Not likely to help much, though, but whatever... if (CurrentRenderPrimitive != c_RenderprimitiveType[primitive])
if (c_primitiveType[primitive] == GL_TRIANGLES || {
c_primitiveType[primitive] == GL_LINES || // We are NOT collecting the right type.
c_primitiveType[primitive] == GL_POINTS || Flush();
c_primitiveType[primitive] == GL_QUADS) { CurrentRenderPrimitive = c_RenderprimitiveType[primitive];
u16 *ptr = 0;
if (lastPrimitive != GL_POINTS)
{
ptr = IBuffer;
}
indexGen.Start((unsigned short*)ptr);
AddIndices(c_primitiveType[primitive], numvertices);
}
else // We are collecting the right type, keep going
{
INCSTAT(stats.thisFrame.numPrimitiveJoins); INCSTAT(stats.thisFrame.numPrimitiveJoins);
// Easy join AddIndices(c_primitiveType[primitive], numvertices);
s_vertexGroupSize.back() += numvertices;
s_vertexCount += numvertices;
return;
} }
} }
s_vertexFirstOffset.push_back(s_vertexCount); inline void Draw(int numVertices, int indexLen)
s_vertexGroupSize.push_back(numvertices); {
s_vertexCount += numvertices;
if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive]) if (CurrentRenderPrimitive != GL_POINT)
s_vertexGroups.back().second++; {
glDrawElements(CurrentRenderPrimitive, indexLen, GL_UNSIGNED_SHORT, IBuffer);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
else else
s_vertexGroups.push_back(std::make_pair(c_primitiveType[primitive], 1)); {
glDrawArrays(CurrentRenderPrimitive,0,numVertices);
INCSTAT(stats.thisFrame.numDrawCalls);
}
#if defined(_DEBUG) || defined(DEBUGFAST)
static const char *sprims[8] = {"quads", "nothing", "tris", "tstrip", "tfan", "lines", "lstrip", "points"};
PRIM_LOG("prim: %s, c=%d", sprims[primitive], numvertices);
#endif
} }
void Flush() void Flush()
{ {
if (s_vertexCount == 0) if (LocalVBuffer == s_pCurBufferPointer) return;
return; int numVerts = indexGen.GetNumVerts();
if(numVerts == 0) return;
_assert_(s_pCurBufferPointer != s_pBaseBufferPointer);
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens, PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens,
@ -188,8 +233,9 @@ void Flush()
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]); glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW); glBufferSubData(GL_ARRAY_BUFFER,0, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
// setup the pointers // setup the pointers
@ -266,49 +312,30 @@ void Flush()
// finally bind // finally bind
// TODO - cache progid, check if same as before. Maybe GL does this internally, though. int groupStart = 0;
// This is the really annoying problem with GL - you never know whether it's worth caching stuff yourself. if (vs) VertexShaderCache::SetCurrentShader(vs->glprogid);
if (vs) glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs->glprogid); if (ps) PixelShaderCache::SetCurrentShader(ps->glprogid); // Lego Star Wars crashes here.
if (ps) glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps->glprogid); // Lego Star Wars crashes here.
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG(""); PRIM_LOG("");
#endif #endif
int groupStart = 0; int numIndexes = indexGen.GetindexLen();
for (unsigned i = 0; i < s_vertexGroups.size(); i++) Draw(numVerts,numIndexes);
{
INCSTAT(stats.thisFrame.numDrawCalls);
glMultiDrawArrays(s_vertexGroups[i].first,
&s_vertexFirstOffset[groupStart],
&s_vertexGroupSize[groupStart],
s_vertexGroups[i].second);
groupStart += s_vertexGroups[i].second;
}
// run through vertex groups again to set alpha // run through vertex groups again to set alpha
if (!g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate) if (!g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate)
{ {
ps = PixelShaderCache::GetShader(true); ps = PixelShaderCache::GetShader(true);
if (ps) glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps->glprogid); if (ps)PixelShaderCache::SetCurrentShader(ps->glprogid);
// only update alpha // only update alpha
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE);
glDisable(GL_BLEND); glDisable(GL_BLEND);
groupStart = 0; Draw(numVerts,numIndexes);
for (unsigned i = 0; i < s_vertexGroups.size(); i++)
{
INCSTAT(stats.thisFrame.numDrawCalls);
glMultiDrawArrays(s_vertexGroups[i].first,
&s_vertexFirstOffset[groupStart],
&s_vertexGroupSize[groupStart],
s_vertexGroups[i].second);
groupStart += s_vertexGroups[i].second;
}
// restore color mask // restore color mask
Renderer::SetColorMask(); Renderer::SetColorMask();
@ -342,5 +369,5 @@ void Flush()
ResetBuffer(); ResetBuffer();
} }
} // namespace } // namespace

View File

@ -28,7 +28,7 @@ namespace VertexManager
bool Init(); bool Init();
void Shutdown(); void Shutdown();
void AddIndices(int _primitive, int _numVertices);
void ResetBuffer(); void ResetBuffer();
}; };

View File

@ -38,6 +38,8 @@
VertexShaderCache::VSCache VertexShaderCache::vshaders; VertexShaderCache::VSCache VertexShaderCache::vshaders;
bool VertexShaderCache::s_displayCompileAlert; bool VertexShaderCache::s_displayCompileAlert;
GLuint VertexShaderCache::CurrentShader;
bool VertexShaderCache::ShaderEnabled;
static VERTEXSHADER *pShaderLast = NULL; static VERTEXSHADER *pShaderLast = NULL;
static int s_nMaxVertexInstructions; static int s_nMaxVertexInstructions;
@ -119,6 +121,9 @@ void VertexShaderCache::Init()
s_displayCompileAlert = true; s_displayCompileAlert = true;
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions); glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions);
ShaderEnabled = false;
CurrentShader = 0;
EnableShader(0);
} }
void VertexShaderCache::Shutdown() void VertexShaderCache::Shutdown()
@ -243,9 +248,10 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr
plocal = strstr(plocal + 13, "program.local"); plocal = strstr(plocal + 13, "program.local");
} }
glGenProgramsARB(1, &vs.glprogid); glGenProgramsARB(1, &vs.glprogid);
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs.glprogid); EnableShader(vs.glprogid);
//glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs.glprogid);
//CurrentShader = vs.glprogid;
glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);
err = GL_REPORT_ERROR(); err = GL_REPORT_ERROR();
if (err != GL_NO_ERROR) { if (err != GL_NO_ERROR) {
ERROR_LOG(VIDEO, pstrprogram); ERROR_LOG(VIDEO, pstrprogram);
@ -260,3 +266,39 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr
return true; return true;
} }
void VertexShaderCache::DisableShader()
{
if(ShaderEnabled)
{
CurrentShader = 0;
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader);
glDisable(GL_VERTEX_PROGRAM_ARB);
ShaderEnabled = false;
}
}
void VertexShaderCache::SetCurrentShader(GLuint Shader)
{
if(ShaderEnabled && CurrentShader != Shader)
{
CurrentShader = Shader;
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader);
}
}
void VertexShaderCache::EnableShader(GLuint Shader)
{
if(!ShaderEnabled)
{
glEnable(GL_VERTEX_PROGRAM_ARB);
ShaderEnabled= true;
CurrentShader = 0;
}
if(CurrentShader != Shader)
{
CurrentShader = Shader;
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader);
}
}

View File

@ -54,6 +54,9 @@ class VertexShaderCache
static bool s_displayCompileAlert; static bool s_displayCompileAlert;
static GLuint CurrentShader;
static bool ShaderEnabled;
public: public:
static void Init(); static void Init();
static void ProgressiveCleanup(); static void ProgressiveCleanup();
@ -61,6 +64,11 @@ public:
static VERTEXSHADER* GetShader(u32 components); static VERTEXSHADER* GetShader(u32 components);
static bool CompileVertexShader(VERTEXSHADER& ps, const char* pstrprogram); static bool CompileVertexShader(VERTEXSHADER& ps, const char* pstrprogram);
static void SetCurrentShader(GLuint Shader);
static void DisableShader();
static void EnableShader(GLuint Shader);
}; };
#endif // _VERTEXSHADERCACHE_H_ #endif // _VERTEXSHADERCACHE_H_

View File

@ -513,6 +513,7 @@ void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
// Make sure previous swap request has made it to the screen // Make sure previous swap request has made it to the screen
if (g_VideoInitialize.bUseDualCore) if (g_VideoInitialize.bUseDualCore)
{ {
while (Common::AtomicLoadAcquire(s_swapRequested)) while (Common::AtomicLoadAcquire(s_swapRequested))
Common::YieldCPU(); Common::YieldCPU();
} }