OpenGL: commit rodolfoosvaldobogado's (what a name!) speedup patches.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4322 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-09-26 12:39:12 +00:00
parent 5a7fbd63c2
commit 29808cdde2
16 changed files with 295 additions and 150 deletions

View File

@ -32,6 +32,7 @@ void IndexGenerator::Start(unsigned short *startptr)
index = 0;
numPrims = 0;
adds = 0;
indexLen = 0;
onlyLists = true;
}
@ -45,6 +46,7 @@ void IndexGenerator::AddList(int numVerts)
*ptr++ = index+i*3+1;
*ptr++ = index+i*3+2;
}
indexLen += numVerts;
index += numVerts;
numPrims += numTris;
adds++;
@ -62,6 +64,7 @@ void IndexGenerator::AddStrip(int numVerts)
*ptr++ = index+i+(wind?1:2);
wind = !wind;
}
indexLen += numTris * 3;
index += numVerts;
numPrims += numTris;
adds++;
@ -77,6 +80,7 @@ void IndexGenerator::AddLineList(int numVerts)
*ptr++ = index+i*2;
*ptr++ = index+i*2+1;
}
indexLen += numVerts;
index += numVerts;
numPrims += numLines;
adds++;
@ -91,6 +95,7 @@ void IndexGenerator::AddLineStrip(int numVerts)
*ptr++ = index+i;
*ptr++ = index+i+1;
}
indexLen += numLines * 2;
index += numVerts;
numPrims += numLines;
adds++;
@ -107,6 +112,7 @@ void IndexGenerator::AddFan(int numVerts)
*ptr++ = index+i+1;
*ptr++ = index+i+2;
}
indexLen += numTris * 3;
index += numVerts;
numPrims += numTris;
adds++;
@ -126,6 +132,7 @@ void IndexGenerator::AddQuads(int numVerts)
*ptr++ = index+i*4+2;
*ptr++ = index+i*4+3;
}
indexLen += numTris * 3;
index += numVerts;
numPrims += numTris;
adds++;

View File

@ -35,12 +35,14 @@ public:
int GetNumPrims() {return numPrims;} //returns numprimitives
int GetNumVerts() {return index;} //returns numprimitives
int GetNumAdds() {return adds;}
int GetindexLen() {return indexLen;}
bool GetOnlyLists() {return onlyLists;}
private:
unsigned short *ptr;
int numPrims;
int index;
int adds;
int indexLen;
bool onlyLists;
};

View File

@ -602,7 +602,8 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
// Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
{
VertexManager::Flush();
//Don't flush here we can join some primitives, let the vertex manager do this work
//VertexManager::Flush();
// Also move the Set() here?
}
g_nativeVertexFmt = m_NativeFmt;

View File

@ -122,11 +122,12 @@ void VertexShaderManager::SetConstants()
for (int i = istart; i < iend; ++i)
{
u32 color = *(const u32*)(xfmemptr + 3);
float NormalizationCoef = 1 / 255.0f;
SetVSConstant4f(C_LIGHTS + 5 * i,
((color >> 24) & 0xFF) / 255.0f,
((color >> 16) & 0xFF) / 255.0f,
((color >> 8) & 0xFF) / 255.0f,
((color) & 0xFF) / 255.0f);
((color >> 24) & 0xFF) * NormalizationCoef,
((color >> 16) & 0xFF) * NormalizationCoef,
((color >> 8) & 0xFF) * NormalizationCoef,
((color) & 0xFF) * NormalizationCoef);
xfmemptr += 4;
for (int j = 0; j < 4; ++j, xfmemptr += 3)
@ -466,11 +467,11 @@ void VertexShaderManager::SetMaterialColor(int index, u32 data)
int ind = index * 4;
nMaterialsChanged |= (1 << index);
s_fMaterials[ind++] = ((data >> 24) & 0xFF) / 255.0f;
s_fMaterials[ind++] = ((data >> 16) & 0xFF) / 255.0f;
s_fMaterials[ind++] = ((data >> 8) & 0xFF) / 255.0f;
s_fMaterials[ind] = ( data & 0xFF) / 255.0f;
float NormalizationCoef = 1 / 255.0f;
s_fMaterials[ind++] = ((data >> 24) & 0xFF) * NormalizationCoef;
s_fMaterials[ind++] = ((data >> 16) & 0xFF) * NormalizationCoef;
s_fMaterials[ind++] = ((data >> 8) & 0xFF) * NormalizationCoef;
s_fMaterials[ind] = ( data & 0xFF) * NormalizationCoef;
}
void VertexShaderManager::TranslateView(float x, float y)

View File

@ -58,7 +58,7 @@ void FramebufferManager::Init(int targetWidth, int targetHeight, int msaaSamples
m_efbDepth = glObj[1];
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbColor);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbDepth);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
@ -119,7 +119,7 @@ void FramebufferManager::Init(int targetWidth, int targetHeight, int msaaSamples
m_resolvedDepthTexture = glObj[1];
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedColorTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedDepthTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
@ -343,7 +343,7 @@ void FramebufferManager::copyToVirtualXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight
// the image will be allocated by glCopyTexImage2D (later).
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, xfbTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, m_targetWidth, m_targetHeight, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, m_targetWidth, m_targetHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
}
@ -431,7 +431,7 @@ const XFBSource* FramebufferManager::getRealXFBSource(u32 xfbAddr, u32 fbWidth,
glGenTextures(1, &m_realXFBSource.texture);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_realXFBSource.texture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, MAX_XFB_WIDTH, MAX_XFB_HEIGHT, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, MAX_XFB_WIDTH, MAX_XFB_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
}

View File

@ -40,6 +40,8 @@ static GLuint s_DepthMatrixProgram = 0;
PixelShaderCache::PSCache PixelShaderCache::pshaders;
PIXELSHADERUID PixelShaderCache::s_curuid;
bool PixelShaderCache::s_displayCompileAlert;
GLuint PixelShaderCache::CurrentShader;
bool PixelShaderCache::ShaderEnabled;
static FRAGMENTSHADER* pShaderLast = NULL;
static float lastPSconstants[C_COLORMATRIX+16][4];
@ -138,6 +140,9 @@ void PixelShaderCache::Init()
glDeleteProgramsARB(1, &s_DepthMatrixProgram);
s_DepthMatrixProgram = 0;
}
CurrentShader=0;
ShaderEnabled = false;
EnableShader(s_DepthMatrixProgram);
}
void PixelShaderCache::Shutdown()
@ -280,7 +285,9 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr
}
glGenProgramsARB(1, &ps.glprogid);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps.glprogid);
EnableShader(ps.glprogid);
//glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps.glprogid);
//CurrentShader = ps.glprogid;
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);
err = GL_REPORT_ERROR();
@ -312,3 +319,43 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr
#endif
return true;
}
//Disable Fragment programs and reset the selected Program
void PixelShaderCache::DisableShader()
{
CurrentShader = 0;
if(ShaderEnabled)
{
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = false;
}
}
//bind a program if is diferent from the binded oone
void PixelShaderCache::SetCurrentShader(GLuint Shader)
{
//The caching here breakes Super Mario Sunshine i'm still trying to figure out wy
if(ShaderEnabled /*&& CurrentShader != Shader*/)
{
CurrentShader = Shader;
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader);
}
}
//Enable Fragment program and bind initial program
void PixelShaderCache::EnableShader(GLuint Shader)
{
if(!ShaderEnabled)
{
glEnable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = true;
CurrentShader = 0;
}
if(CurrentShader != Shader)
{
CurrentShader = Shader;
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader);
}
}

View File

@ -63,6 +63,10 @@ class PixelShaderCache
static bool s_displayCompileAlert;
static GLuint CurrentShader;
static bool ShaderEnabled;
public:
static void Init();
static void ProgressiveCleanup();
@ -74,6 +78,12 @@ public:
static GLuint GetColorMatrixProgram();
static GLuint GetDepthMatrixProgram();
static void SetCurrentShader(GLuint Shader);
static void DisableShader();
static void EnableShader(GLuint Shader);
};
#endif // _PIXELSHADERCACHE_H_

View File

@ -74,14 +74,12 @@ bool ApplyShader()
if (s_shader.glprogid != 0)
{
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_shader.glprogid);
PixelShaderCache::EnableShader(s_shader.glprogid);
return true;
}
else
{
glDisable(GL_FRAGMENT_PROGRAM_ARB);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
PixelShaderCache::DisableShader();
return false;
}
}

View File

@ -564,8 +564,8 @@ void Renderer::ResetAPIState()
{
// Gets us to a reasonably sane state where it's possible to do things like
// image copies with textured quads, etc.
glDisable(GL_VERTEX_PROGRAM_ARB);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
VertexShaderCache::DisableShader();
PixelShaderCache::DisableShader();
glDisable(GL_SCISSOR_TEST);
glDisable(GL_DEPTH_TEST);
@ -597,8 +597,8 @@ void Renderer::RestoreAPIState()
SetColorMask();
SetBlendMode(true);
glEnable(GL_VERTEX_PROGRAM_ARB);
glEnable(GL_FRAGMENT_PROGRAM_ARB);
VertexShaderCache::EnableShader(0);
PixelShaderCache::EnableShader(0);
}
void Renderer::SetColorMask()
@ -843,7 +843,6 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
{
if (s_skipSwap)
return;
const XFBSource* xfbSource = g_framebufferManager.GetXFBSource(xfbAddr, fbWidth, fbHeight);
if (!xfbSource)
{
@ -917,9 +916,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
glTexCoord2f(sourceRc.right, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f( 1, 1);
glTexCoord2f(sourceRc.right, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f( 1, -1);
glEnd();
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
PixelShaderCache::DisableShader();;
}
else
{
@ -1070,12 +1067,12 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
// ---------------------------------------------------------------------
GL_REPORT_ERRORD();
for (int i = 0; i < 8; i++) {
/*for (int i = 0; i < 8; i++) {
glActiveTexture(GL_TEXTURE0 + i);
glDisable(GL_TEXTURE_2D);
glDisable(GL_TEXTURE_RECTANGLE_ARB);
}
glActiveTexture(GL_TEXTURE0);
glActiveTexture(GL_TEXTURE0);*/
DrawDebugText();
@ -1087,8 +1084,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
OSD::DrawMessages();
if (blend_enabled)
glEnable(GL_BLEND);
GL_REPORT_ERRORD();
GL_REPORT_ERRORD();
#if defined(DVPROFILE)
if (g_bWriteProfile) {
//g_bWriteProfile = 0;
@ -1107,8 +1103,8 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
GL_REPORT_ERRORD();
// Clear framebuffer
glClearColor(0, 0, 0, 0);
glClear(GL_COLOR_BUFFER_BIT);
//glClearColor(0, 0, 0, 0);
//glClear(GL_COLOR_BUFFER_BIT);
GL_REPORT_ERRORD();

View File

@ -34,6 +34,8 @@ namespace TextureConverter
static GLuint s_texConvFrameBuffer = 0;
static GLuint s_srcTexture = 0; // for decoding from RAM
static GLuint s_srcTextureWidth = 0;
static GLuint s_srcTextureHeight = 0;
static GLuint s_dstRenderBuffer = 0; // for encoding to RAM
const int renderBufferWidth = 1024;
@ -60,15 +62,12 @@ void CreateRgbToYuyvProgram()
" float2 uv1 = float2(uv0.x + 1.0f, uv0.y);\n"
" float3 c0 = texRECT(samp0, uv0).rgb;\n"
" float3 c1 = texRECT(samp0, uv1).rgb;\n"
" float y0 = (0.257f * c0.r) + (0.504f * c0.g) + (0.098f * c0.b) + 0.0625f;\n"
" float u0 =-(0.148f * c0.r) - (0.291f * c0.g) + (0.439f * c0.b) + 0.5f;\n"
" float v0 = (0.439f * c0.r) - (0.368f * c0.g) - (0.071f * c0.b) + 0.5f;\n"
" float y1 = (0.257f * c1.r) + (0.504f * c1.g) + (0.098f * c1.b) + 0.0625f;\n"
" float u1 =-(0.148f * c1.r) - (0.291f * c1.g) + (0.439f * c1.b) + 0.5f;\n"
" float v1 = (0.439f * c1.r) - (0.368f * c1.g) - (0.071f * c1.b) + 0.5f;\n"
" ocol0 = float4(y1, (u0 + u1) / 2, y0, (v0 + v1) / 2);\n"
" float3 y_const = float3(0.257f,0.504f,0.098f);\n"
" float3 u_const = float3(-0.148f,-0.291f,0.439f);\n"
" float3 v_const = float3(0.439f,-0.368f,-0.071f);\n"
" float4 const3 = float4(0.0625f,0.5f,0.0625f,0.5f);\n"
" float3 c01 = (c0 + c1) * 0.5f;\n"
" ocol0 = float4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n"
"}\n";
if (!PixelShaderCache::CompilePixelShader(s_rgbToYuyvProgram, FProgram)) {
@ -204,8 +203,7 @@ void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const Tar
glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight);
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader.glprogid);
PixelShaderCache::EnableShader(shader.glprogid);
// Draw...
glBegin(GL_QUADS);
@ -218,7 +216,7 @@ void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const Tar
// .. and then readback the results.
// TODO: make this less slow.
glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr);
glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destAddr);
GL_REPORT_ERRORD();
g_framebufferManager.SetFramebuffer(0);
@ -332,13 +330,21 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_srcTexture);
// TODO: make this less slow. (How?)
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, (GLsizei)srcFmtWidth, (GLsizei)srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
if(s_srcTextureWidth == (GLsizei)srcFmtWidth && s_srcTextureHeight == (GLsizei)srcHeight)
{
glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,0,0,s_srcTextureWidth, s_srcTextureHeight, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
}
else
{
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, (GLsizei)srcFmtWidth, (GLsizei)srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
s_srcTextureWidth = (GLsizei)srcFmtWidth;
s_srcTextureHeight = (GLsizei)srcHeight;
}
glViewport(0, 0, srcWidth, srcHeight);
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_yuyvToRgbProgram.glprogid);
PixelShaderCache::EnableShader(s_yuyvToRgbProgram.glprogid);
GL_REPORT_ERRORD();
glBegin(GL_QUADS);

View File

@ -513,7 +513,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
{
glGenTextures(1, (GLuint *)&entry.texture);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, entry.texture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
GL_REPORT_ERRORD();
}
else
@ -531,7 +531,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
glDeleteTextures(1,(GLuint *)&entry.texture);
glGenTextures(1, (GLuint *)&entry.texture);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, entry.texture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, w, h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
GL_REPORT_ERRORD();
}
}
@ -692,8 +692,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
glViewport(0, 0, w, h);
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram());
PixelShaderCache::EnableShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram());
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
GL_REPORT_ERRORD();

View File

@ -37,8 +37,9 @@
#include "VertexShaderGen.h"
#include "VertexLoader.h"
#include "VertexManager.h"
#include "IndexGenerator.h"
#define MAX_BUFFER_SIZE 0x4000
#define MAX_BUFFER_SIZE 0x50000
// internal state for loading vertices
extern NativeVertexFormat *g_nativeVertexFmt;
@ -46,13 +47,17 @@ extern NativeVertexFormat *g_nativeVertexFmt;
namespace VertexManager
{
static GLuint s_vboBuffers[0x40] = {0};
static int s_nCurVBOIndex = 0; // current free buffer
static u8 *s_pBaseBufferPointer = NULL;
static std::vector< GLint > s_vertexFirstOffset;
static std::vector< GLsizei > s_vertexGroupSize;
static std::vector< std::pair< GLenum, int > > s_vertexGroups;
u32 s_vertexCount;
static const GLenum c_RenderprimitiveType[8] =
{
GL_TRIANGLES,
GL_ZERO, //nothing
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_LINES,
GL_LINES,
GL_POINTS
};
static const GLenum c_primitiveType[8] =
{
@ -66,35 +71,48 @@ static const GLenum c_primitiveType[8] =
GL_POINTS
};
static IndexGenerator indexGen;
static GLenum lastPrimitive;
static GLenum CurrentRenderPrimitive;
static u8 *LocalVBuffer;
static u16 *IBuffer;
#define MAXVBUFFERSIZE 0x50000
#define MAXIBUFFERSIZE 0x20000
#define MAXVBOBUFFERCOUNT 0x4
static GLuint s_vboBuffers[MAXVBOBUFFERCOUNT] = {0};
static GLuint s_IBuffers[MAXVBOBUFFERCOUNT] = {0};
static int s_nCurVBOIndex = 0; // current free buffer
bool Init()
{
s_pBaseBufferPointer = (u8*)AllocateMemoryPages(MAX_BUFFER_SIZE);
s_pCurBufferPointer = s_pBaseBufferPointer;
lastPrimitive = GL_ZERO;
CurrentRenderPrimitive = GL_ZERO;
LocalVBuffer = new u8[MAXVBUFFERSIZE];
IBuffer = new u16[MAXIBUFFERSIZE];
s_pCurBufferPointer = LocalVBuffer;
s_nCurVBOIndex = 0;
glGenBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers);
for (u32 i = 0; i < ARRAYSIZE(s_vboBuffers); ++i) {
glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[i]);
glBufferData(GL_ARRAY_BUFFER, MAX_BUFFER_SIZE, NULL, GL_STREAM_DRAW);
glBufferData(GL_ARRAY_BUFFER, MAXVBUFFERSIZE, NULL, GL_STREAM_DRAW);
}
glEnableClientState(GL_VERTEX_ARRAY);
g_nativeVertexFmt = NULL;
GL_REPORT_ERRORD();
return true;
}
void Shutdown()
{
FreeMemoryPages(s_pBaseBufferPointer, MAX_BUFFER_SIZE); s_pBaseBufferPointer = s_pCurBufferPointer = NULL;
delete [] LocalVBuffer;
delete [] IBuffer;
glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers);
memset(s_vboBuffers, 0, sizeof(s_vboBuffers));
s_vertexFirstOffset.resize(0);
s_vertexGroupSize.resize(0);
s_vertexGroups.resize(0);
s_vertexCount = 0;
s_nCurVBOIndex = 0;
ResetBuffer();
}
@ -102,58 +120,85 @@ void Shutdown()
void ResetBuffer()
{
s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
s_pCurBufferPointer = s_pBaseBufferPointer;
s_vertexFirstOffset.resize(0);
s_vertexGroupSize.resize(0);
s_vertexGroups.resize(0);
s_vertexCount = 0;
s_pCurBufferPointer = LocalVBuffer;
CurrentRenderPrimitive = GL_ZERO;
u16 *ptr = 0;
indexGen.Start((unsigned short*)ptr);
}
void AddIndices(int _primitive, int _numVertices)
{
switch (_primitive)
{
case GL_QUADS: indexGen.AddQuads(_numVertices); return;
case GL_TRIANGLES: indexGen.AddList(_numVertices); return;
case GL_TRIANGLE_STRIP: indexGen.AddStrip(_numVertices); return;
case GL_TRIANGLE_FAN: indexGen.AddFan(_numVertices); return;
case GL_LINE_STRIP: indexGen.AddLineStrip(_numVertices); return;
case GL_LINES: indexGen.AddLineList(_numVertices); return;
case GL_POINTS: indexGen.AddPoints(_numVertices); return;
}
}
int GetRemainingSize()
{
return MAX_BUFFER_SIZE - (int)(s_pCurBufferPointer - s_pBaseBufferPointer);
return LocalVBuffer + MAXVBUFFERSIZE - s_pCurBufferPointer;
}
void AddVertices(int primitive, int numvertices)
{
_assert_(numvertices > 0);
_assert_(g_nativeVertexFmt != NULL);
if (numvertices <= 0)
return;
if (c_primitiveType[primitive] == GL_ZERO)
return;
DVSTARTPROFILE();
lastPrimitive = c_primitiveType[primitive];
ADDSTAT(stats.thisFrame.numPrims, numvertices);
if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive]) {
// We can join primitives for free here. Not likely to help much, though, but whatever...
if (c_primitiveType[primitive] == GL_TRIANGLES ||
c_primitiveType[primitive] == GL_LINES ||
c_primitiveType[primitive] == GL_POINTS ||
c_primitiveType[primitive] == GL_QUADS) {
INCSTAT(stats.thisFrame.numPrimitiveJoins);
// Easy join
s_vertexGroupSize.back() += numvertices;
s_vertexCount += numvertices;
return;
if (CurrentRenderPrimitive != c_RenderprimitiveType[primitive])
{
// We are NOT collecting the right type.
Flush();
CurrentRenderPrimitive = c_RenderprimitiveType[primitive];
u16 *ptr = 0;
if (lastPrimitive != GL_POINTS)
{
ptr = IBuffer;
}
indexGen.Start((unsigned short*)ptr);
AddIndices(c_primitiveType[primitive], numvertices);
}
else // We are collecting the right type, keep going
{
INCSTAT(stats.thisFrame.numPrimitiveJoins);
AddIndices(c_primitiveType[primitive], numvertices);
}
}
s_vertexFirstOffset.push_back(s_vertexCount);
s_vertexGroupSize.push_back(numvertices);
s_vertexCount += numvertices;
if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive])
s_vertexGroups.back().second++;
inline void Draw(int numVertices, int indexLen)
{
if (CurrentRenderPrimitive != GL_POINT)
{
glDrawElements(CurrentRenderPrimitive, indexLen, GL_UNSIGNED_SHORT, IBuffer);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
else
s_vertexGroups.push_back(std::make_pair(c_primitiveType[primitive], 1));
#if defined(_DEBUG) || defined(DEBUGFAST)
static const char *sprims[8] = {"quads", "nothing", "tris", "tstrip", "tfan", "lines", "lstrip", "points"};
PRIM_LOG("prim: %s, c=%d", sprims[primitive], numvertices);
#endif
{
glDrawArrays(CurrentRenderPrimitive,0,numVertices);
INCSTAT(stats.thisFrame.numDrawCalls);
}
}
void Flush()
{
if (s_vertexCount == 0)
return;
_assert_(s_pCurBufferPointer != s_pBaseBufferPointer);
if (LocalVBuffer == s_pCurBufferPointer) return;
int numVerts = indexGen.GetNumVerts();
if(numVerts == 0) return;
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens,
@ -187,9 +232,10 @@ void Flush()
GL_REPORT_ERRORD();
glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW);
glBufferSubData(GL_ARRAY_BUFFER,0, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer);
GL_REPORT_ERRORD();
// setup the pointers
@ -266,54 +312,35 @@ void Flush()
// finally bind
// TODO - cache progid, check if same as before. Maybe GL does this internally, though.
// This is the really annoying problem with GL - you never know whether it's worth caching stuff yourself.
if (vs) glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs->glprogid);
if (ps) glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps->glprogid); // Lego Star Wars crashes here.
int groupStart = 0;
if (vs) VertexShaderCache::SetCurrentShader(vs->glprogid);
if (ps) PixelShaderCache::SetCurrentShader(ps->glprogid); // Lego Star Wars crashes here.
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("");
#endif
int groupStart = 0;
for (unsigned i = 0; i < s_vertexGroups.size(); i++)
{
INCSTAT(stats.thisFrame.numDrawCalls);
glMultiDrawArrays(s_vertexGroups[i].first,
&s_vertexFirstOffset[groupStart],
&s_vertexGroupSize[groupStart],
s_vertexGroups[i].second);
groupStart += s_vertexGroups[i].second;
}
int numIndexes = indexGen.GetindexLen();
Draw(numVerts,numIndexes);
// run through vertex groups again to set alpha
if (!g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate)
{
ps = PixelShaderCache::GetShader(true);
if (ps) glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps->glprogid);
if (ps)PixelShaderCache::SetCurrentShader(ps->glprogid);
// only update alpha
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE);
// only update alpha
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE);
glDisable(GL_BLEND);
glDisable(GL_BLEND);
groupStart = 0;
for (unsigned i = 0; i < s_vertexGroups.size(); i++)
{
INCSTAT(stats.thisFrame.numDrawCalls);
glMultiDrawArrays(s_vertexGroups[i].first,
&s_vertexFirstOffset[groupStart],
&s_vertexGroupSize[groupStart],
s_vertexGroups[i].second);
groupStart += s_vertexGroups[i].second;
}
Draw(numVerts,numIndexes);
// restore color mask
Renderer::SetColorMask();
// restore color mask
Renderer::SetColorMask();
if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract)
glEnable(GL_BLEND);
if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract)
glEnable(GL_BLEND);
}
#if defined(_DEBUG) || defined(DEBUGFAST)
@ -342,5 +369,5 @@ void Flush()
ResetBuffer();
}
} // namespace

View File

@ -28,7 +28,7 @@ namespace VertexManager
bool Init();
void Shutdown();
void AddIndices(int _primitive, int _numVertices);
void ResetBuffer();
};

View File

@ -38,6 +38,8 @@
VertexShaderCache::VSCache VertexShaderCache::vshaders;
bool VertexShaderCache::s_displayCompileAlert;
GLuint VertexShaderCache::CurrentShader;
bool VertexShaderCache::ShaderEnabled;
static VERTEXSHADER *pShaderLast = NULL;
static int s_nMaxVertexInstructions;
@ -119,6 +121,9 @@ void VertexShaderCache::Init()
s_displayCompileAlert = true;
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions);
ShaderEnabled = false;
CurrentShader = 0;
EnableShader(0);
}
void VertexShaderCache::Shutdown()
@ -243,9 +248,10 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr
plocal = strstr(plocal + 13, "program.local");
}
glGenProgramsARB(1, &vs.glprogid);
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs.glprogid);
glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);
EnableShader(vs.glprogid);
//glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs.glprogid);
//CurrentShader = vs.glprogid;
glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);
err = GL_REPORT_ERROR();
if (err != GL_NO_ERROR) {
ERROR_LOG(VIDEO, pstrprogram);
@ -257,6 +263,42 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr
#if defined(_DEBUG) || defined(DEBUGFAST)
vs.strprog = pstrprogram;
#endif
return true;
}
void VertexShaderCache::DisableShader()
{
if(ShaderEnabled)
{
CurrentShader = 0;
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader);
glDisable(GL_VERTEX_PROGRAM_ARB);
ShaderEnabled = false;
}
}
void VertexShaderCache::SetCurrentShader(GLuint Shader)
{
if(ShaderEnabled && CurrentShader != Shader)
{
CurrentShader = Shader;
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader);
}
}
void VertexShaderCache::EnableShader(GLuint Shader)
{
if(!ShaderEnabled)
{
glEnable(GL_VERTEX_PROGRAM_ARB);
ShaderEnabled= true;
CurrentShader = 0;
}
if(CurrentShader != Shader)
{
CurrentShader = Shader;
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader);
}
}

View File

@ -53,6 +53,9 @@ class VertexShaderCache
static VSCache vshaders;
static bool s_displayCompileAlert;
static GLuint CurrentShader;
static bool ShaderEnabled;
public:
static void Init();
@ -61,6 +64,11 @@ public:
static VERTEXSHADER* GetShader(u32 components);
static bool CompileVertexShader(VERTEXSHADER& ps, const char* pstrprogram);
static void SetCurrentShader(GLuint Shader);
static void DisableShader();
static void EnableShader(GLuint Shader);
};
#endif // _VERTEXSHADERCACHE_H_

View File

@ -513,6 +513,7 @@ void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
// Make sure previous swap request has made it to the screen
if (g_VideoInitialize.bUseDualCore)
{
while (Common::AtomicLoadAcquire(s_swapRequested))
Common::YieldCPU();
}