D3D supports setting multiple shader constants at once, so let's add support for that. Very tiny speedup. Also remove the annoying black window in the background when configuring D3D before starting a game. Also make sure to write all values when converting normals - might help the cpu's write gather cache.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4255 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-09-12 15:00:08 +00:00
parent e31cc7d1fe
commit 52ea8a0fd1
7 changed files with 118 additions and 89 deletions

View File

@ -116,7 +116,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
VertexManager::s_pCurBufferPointer++;
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed char)DataReadU8()+0.5f) / 127.5f;
}
@ -126,6 +126,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[2] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16()
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed short)DataReadU16()+0.5f) / 32767.5f;
@ -149,7 +150,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
VertexManager::s_pCurBufferPointer++;
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
}
@ -161,6 +162,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[2] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
@ -188,7 +190,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
*VertexManager::s_pCurBufferPointer++ = pData[0];
*VertexManager::s_pCurBufferPointer++ = pData[1];
*VertexManager::s_pCurBufferPointer++ = pData[2];
VertexManager::s_pCurBufferPointer++;
*VertexManager::s_pCurBufferPointer++ = 0;
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed char)Memory_Read_U8(iAddress)+0.5f) / 127.5f;
// ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(signed char)Memory_Read_U8(iAddress+1)+0.5f) / 127.5f;
// ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(signed char)Memory_Read_U8(iAddress+2)+0.5f) / 127.5f;
@ -203,6 +205,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[0]);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
@ -227,7 +230,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
*VertexManager::s_pCurBufferPointer++ = pData[3 * i];
*VertexManager::s_pCurBufferPointer++ = pData[3 * i + 1];
*VertexManager::s_pCurBufferPointer++ = pData[3 * i + 2];
VertexManager::s_pCurBufferPointer++;
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
}
@ -241,6 +244,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[3 * i]);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[3 * i + 1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[3 * i + 2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
@ -269,7 +273,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
*VertexManager::s_pCurBufferPointer++ = pData[0];
*VertexManager::s_pCurBufferPointer++ = pData[1];
*VertexManager::s_pCurBufferPointer++ = pData[2];
*VertexManager::s_pCurBufferPointer++;
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
}
@ -283,6 +287,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[0]);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
@ -348,7 +353,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
*VertexManager::s_pCurBufferPointer++ = pData[3 * i];
*VertexManager::s_pCurBufferPointer++ = pData[3 * i + 1];
*VertexManager::s_pCurBufferPointer++ = pData[3 * i + 2];
VertexManager::s_pCurBufferPointer++;
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
}
@ -363,6 +368,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[3 * i]);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[3 * i + 1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[3 * i + 2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
@ -392,7 +398,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
*VertexManager::s_pCurBufferPointer++ = pData[0];
*VertexManager::s_pCurBufferPointer++ = pData[1];
*VertexManager::s_pCurBufferPointer++ = pData[2];
VertexManager::s_pCurBufferPointer++;
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
}
@ -406,6 +412,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[0]);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}

View File

@ -95,19 +95,16 @@ void VertexShaderManager::SetConstants()
int startn = nTransformMatricesChanged[0] / 4;
int endn = (nTransformMatricesChanged[1] + 3) / 4;
const float* pstart = (const float*)&xfmem[startn * 4];
for(int i = startn; i < endn; ++i, pstart += 4)
SetVSConstant4fv(C_TRANSFORMMATRICES + i, pstart);
SetMultiVSConstant4fv(C_TRANSFORMMATRICES + startn, endn - startn, pstart);
nTransformMatricesChanged[0] = nTransformMatricesChanged[1] = -1;
}
if (nNormalMatricesChanged[0] >= 0)
{
int startn = nNormalMatricesChanged[0] / 3;
int endn = (nNormalMatricesChanged[1] + 2) / 3;
const float* pnstart = (const float*)&xfmem[XFMEM_NORMALMATRICES+3*startn];
for(int i = startn; i < endn; ++i, pnstart += 3)
SetVSConstant4fv(C_NORMALMATRICES + i, pnstart);
const float *pnstart = (const float*)&xfmem[XFMEM_NORMALMATRICES+3*startn];
for (int i = startn; i < endn; ++i, pnstart += 3)
SetVSConstant4fv(C_NORMALMATRICES + i, pnstart); // looks like we're reading one too much..
nNormalMatricesChanged[0] = nNormalMatricesChanged[1] = -1;
}
@ -116,8 +113,7 @@ void VertexShaderManager::SetConstants()
int startn = nPostTransformMatricesChanged[0] / 4;
int endn = (nPostTransformMatricesChanged[1] + 3 ) / 4;
const float* pstart = (const float*)&xfmem[XFMEM_POSTMATRICES + startn * 4];
for(int i = startn; i < endn; ++i, pstart += 4)
SetVSConstant4fv(C_POSTTRANSFORMMATRICES + i, pstart);
SetMultiVSConstant4fv(C_POSTTRANSFORMMATRICES + startn, endn - startn, pstart);
}
if (nLightsChanged[0] >= 0)
@ -168,47 +164,41 @@ void VertexShaderManager::SetConstants()
{
bPosNormalMatrixChanged = false;
float* pos = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
float* norm = (float*)xfmem + XFMEM_NORMALMATRICES + 3 * (MatrixIndexA.PosNormalMtxIdx & 31);
const float *pos = (const float *)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
const float *norm = (const float *)xfmem + XFMEM_NORMALMATRICES + 3 * (MatrixIndexA.PosNormalMtxIdx & 31);
SetVSConstant4fv(C_POSNORMALMATRIX, pos);
SetVSConstant4fv(C_POSNORMALMATRIX+1, pos + 4);
SetVSConstant4fv(C_POSNORMALMATRIX+2, pos + 8);
SetMultiVSConstant4fv(C_POSNORMALMATRIX, 3, pos);
SetVSConstant4fv(C_POSNORMALMATRIX+3, norm);
SetVSConstant4fv(C_POSNORMALMATRIX+4, norm + 3);
SetVSConstant4fv(C_POSNORMALMATRIX+5, norm + 6);
}
}
if (bTexMatricesChanged[0])
{
bTexMatricesChanged[0] = false;
float* fptrs[] =
const float *fptrs[] =
{
(float*)xfmem + MatrixIndexA.Tex0MtxIdx * 4, (float*)xfmem + MatrixIndexA.Tex1MtxIdx * 4,
(float*)xfmem + MatrixIndexA.Tex2MtxIdx * 4, (float*)xfmem + MatrixIndexA.Tex3MtxIdx * 4
(const float *)xfmem + MatrixIndexA.Tex0MtxIdx * 4, (const float *)xfmem + MatrixIndexA.Tex1MtxIdx * 4,
(const float *)xfmem + MatrixIndexA.Tex2MtxIdx * 4, (const float *)xfmem + MatrixIndexA.Tex3MtxIdx * 4
};
for (int i = 0; i < 4; ++i)
{
SetVSConstant4fv(C_TEXMATRICES+3 * i, fptrs[i]);
SetVSConstant4fv(C_TEXMATRICES+3 * i + 1, fptrs[i] + 4);
SetVSConstant4fv(C_TEXMATRICES+3 * i + 2, fptrs[i] + 8);
SetMultiVSConstant4fv(C_TEXMATRICES + 3 * i, 3, fptrs[i]);
}
}
if (bTexMatricesChanged[1])
{
bTexMatricesChanged[1] = false;
float* fptrs[] = {(float*)xfmem + MatrixIndexB.Tex4MtxIdx * 4, (float*)xfmem + MatrixIndexB.Tex5MtxIdx * 4,
(float*)xfmem + MatrixIndexB.Tex6MtxIdx * 4, (float*)xfmem + MatrixIndexB.Tex7MtxIdx * 4 };
const float *fptrs[] = {
(const float *)xfmem + MatrixIndexB.Tex4MtxIdx * 4, (const float *)xfmem + MatrixIndexB.Tex5MtxIdx * 4,
(const float *)xfmem + MatrixIndexB.Tex6MtxIdx * 4, (const float *)xfmem + MatrixIndexB.Tex7MtxIdx * 4
};
for (int i = 0; i < 4; ++i)
{
SetVSConstant4fv(C_TEXMATRICES+3 * i + 12, fptrs[i]);
SetVSConstant4fv(C_TEXMATRICES+3 * i + 12 + 1, fptrs[i] + 4);
SetVSConstant4fv(C_TEXMATRICES+3 * i + 12 + 2, fptrs[i] + 8);
SetMultiVSConstant4fv(C_TEXMATRICES+3 * i + 12, 3, fptrs[i]);
}
}
@ -335,17 +325,11 @@ void VertexShaderManager::SetConstants()
Matrix44::Set(mtxB, g_fProjectionMatrix);
Matrix44::Multiply(mtxB, viewMtx, mtxA); // mtxA = projection x view
SetVSConstant4fv(C_PROJECTION, &mtxA.data[0]);
SetVSConstant4fv(C_PROJECTION+1, &mtxA.data[4]);
SetVSConstant4fv(C_PROJECTION+2, &mtxA.data[8]);
SetVSConstant4fv(C_PROJECTION+3, &mtxA.data[12]);
SetMultiVSConstant4fv(C_PROJECTION, 4, &mtxA.data[0]);
}
else
{
SetVSConstant4fv(C_PROJECTION, &g_fProjectionMatrix[0]);
SetVSConstant4fv(C_PROJECTION+1, &g_fProjectionMatrix[4]);
SetVSConstant4fv(C_PROJECTION+2, &g_fProjectionMatrix[8]);
SetVSConstant4fv(C_PROJECTION+3, &g_fProjectionMatrix[12]);
SetMultiVSConstant4fv(C_PROJECTION, 4, &g_fProjectionMatrix[0]);
}
}
}

View File

@ -45,5 +45,6 @@ public:
void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4);
void SetVSConstant4fv(int const_number, const float *f);
void SetMultiVSConstant4fv(int const_number, int count, const float *f);
#endif // _VERTEXSHADERMANAGER_H

View File

@ -37,7 +37,7 @@ static float lastVSconstants[C_FOGPARAMS+8][4];
void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4)
{
if( lastVSconstants[const_number][0] != f1 ||
if (lastVSconstants[const_number][0] != f1 ||
lastVSconstants[const_number][1] != f2 ||
lastVSconstants[const_number][2] != f3 ||
lastVSconstants[const_number][3] != f4)
@ -53,7 +53,7 @@ void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4)
void SetVSConstant4fv(int const_number, const float *f)
{
if( lastVSconstants[const_number][0] != f[0] ||
if (lastVSconstants[const_number][0] != f[0] ||
lastVSconstants[const_number][1] != f[1] ||
lastVSconstants[const_number][2] != f[2] ||
lastVSconstants[const_number][3] != f[3])
@ -66,6 +66,32 @@ void SetVSConstant4fv(int const_number, const float *f)
}
}
void SetMultiVSConstant4fv(int const_number, int count, const float *f)
{
bool change = false;
for (int i = 0; i < count; i++)
{
if (lastVSconstants[const_number + i][0] != f[0 + i*4] ||
lastVSconstants[const_number + i][1] != f[1 + i*4] ||
lastVSconstants[const_number + i][2] != f[2 + i*4] ||
lastVSconstants[const_number + i][3] != f[3 + i*4])
{
change = true;
break;
}
}
if (change)
{
D3D::dev->SetVertexShaderConstantF(const_number, f, count);
for (int i = 0; i < count; i++) {
lastVSconstants[const_number + i][0] = f[0 + i*4];
lastVSconstants[const_number + i][1] = f[1 + i*4];
lastVSconstants[const_number + i][2] = f[2 + i*4];
lastVSconstants[const_number + i][3] = f[3 + i*4];
}
}
}
void VertexShaderCache::Init()
{
//memset(lastVSconstants,0xFF,(C_FOGPARAMS+8)*4*sizeof(float)); // why does this not work

View File

@ -93,8 +93,6 @@ wxWindow* GetParentedWxWindow(HWND Parent)
#if defined(HAVE_WX) && HAVE_WX
void DllDebugger(HWND _hParent, bool Show)
{
//SetWindowTextA(EmuWindow::GetWnd(), "Hello");
if (!m_DebuggerFrame)
m_DebuggerFrame = new GFXDebuggerDX9(GetParentedWxWindow(_hParent));
@ -107,8 +105,6 @@ void DllDebugger(HWND _hParent, bool Show)
void DllDebugger(HWND _hParent, bool Show) { }
#endif
#if defined(HAVE_WX) && HAVE_WX
class wxDLLApp : public wxApp
{
@ -121,11 +117,7 @@ void DllDebugger(HWND _hParent, bool Show) { }
WXDLLIMPEXP_BASE void wxSetInstance(HINSTANCE hInst);
#endif
BOOL APIENTRY DllMain( HINSTANCE hinstDLL, // DLL module handle
DWORD dwReason, // reason called
LPVOID lpvReserved) // reserved
BOOL APIENTRY DllMain(HINSTANCE hinstDLL, DWORD dwReason, LPVOID lpvReserved)
{
switch (dwReason)
{
@ -257,16 +249,20 @@ void DllAbout(HWND _hParent)
void DllConfig(HWND _hParent)
{
if (Init())
// If not initialized, only init D3D so we can enumerate resolutions.
if (initCount == 0)
{
DlgSettings_Show(g_hInstance,_hParent);
DeInit();
D3D::Init();
}
DlgSettings_Show(g_hInstance, _hParent);
if (initCount == 0)
{
D3D::Shutdown();
}
}
void Initialize(void *init)
{
SVideoInitialize *_pVideoInitialize = (SVideoInitialize*)init;
frameCount = 0;
g_VideoInitialize = *_pVideoInitialize;
@ -275,38 +271,13 @@ void Initialize(void *init)
_pVideoInitialize->pUpdateFPSDisplay = g_VideoInitialize.pUpdateFPSDisplay;
_pVideoInitialize->pWindowHandle = g_VideoInitialize.pWindowHandle;
OSD::AddMessage("Dolphin Direct3D9 Video Plugin.",5000);
}
void DoState(unsigned char **ptr, int mode) {
// Clear all caches
TextureCache::Invalidate(false);
PointerWrap p(ptr, mode);
VideoCommon_DoState(p);
//PanicAlert("Saving/Loading state from DirectX9");
}
void Video_EnterLoop()
{
Fifo_EnterLoop(g_VideoInitialize);
}
void Video_ExitLoop()
{
Fifo_ExitLoop();
}
void Video_SetRendering(bool bEnabled) {
Fifo_SetRendering(bEnabled);
OSD::AddMessage("Dolphin Direct3D9 Video Plugin.", 5000);
}
void Video_Prepare(void)
{
Renderer::Init();
TextureCache::Init();
BPInit();
VertexManager::Init();
Fifo_Init();
@ -333,6 +304,28 @@ void Shutdown(void)
DeInit();
}
void DoState(unsigned char **ptr, int mode) {
// Clear texture cache because it might have written to RAM
TextureCache::Invalidate(false);
PointerWrap p(ptr, mode);
VideoCommon_DoState(p);
}
void Video_EnterLoop()
{
Fifo_EnterLoop(g_VideoInitialize);
}
void Video_ExitLoop()
{
Fifo_ExitLoop();
}
void Video_SetRendering(bool bEnabled) {
Fifo_SetRendering(bEnabled);
}
void Video_SendFifoData(u8* _uData, u32 len)
{
Fifo_SendFifoData(_uData, len);

View File

@ -47,8 +47,8 @@ static float lastPSconstants[C_COLORMATRIX+16][4];
void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4)
{
if( lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 ||
lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4 )
if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 ||
lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4)
{
const float f[4] = {f1, f2, f3, f4};
glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f1, f2, f3, f4);
@ -61,8 +61,8 @@ void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4)
void SetPSConstant4fv(int const_number, const float *f)
{
if( lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] ||
lastPSconstants[const_number][2] != f[2] || lastPSconstants[const_number][3] != f[3] )
if (lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] ||
lastPSconstants[const_number][2] != f[2] || lastPSconstants[const_number][3] != f[3])
{
glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f);
lastPSconstants[const_number][0] = f[0];

View File

@ -62,7 +62,7 @@ void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4)
void SetVSConstant4fv(int const_number, const float *f)
{
if( lastVSconstants[const_number][0] != f[0] ||
if (lastVSconstants[const_number][0] != f[0] ||
lastVSconstants[const_number][1] != f[1] ||
lastVSconstants[const_number][2] != f[2] ||
lastVSconstants[const_number][3] != f[3])
@ -75,6 +75,24 @@ void SetVSConstant4fv(int const_number, const float *f)
}
}
void SetMultiVSConstant4fv(int const_number, int count, const float *f)
{
for (int i = 0; i < count; i++)
{
if (lastVSconstants[const_number + i][0] != f[0 + i*4] ||
lastVSconstants[const_number + i][1] != f[1 + i*4] ||
lastVSconstants[const_number + i][2] != f[2 + i*4] ||
lastVSconstants[const_number + i][3] != f[3 + i*4])
{
glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number + i, f + i * 4);
lastVSconstants[const_number + i][0] = f[0 + i*4];
lastVSconstants[const_number + i][1] = f[1 + i*4];
lastVSconstants[const_number + i][2] = f[2 + i*4];
lastVSconstants[const_number + i][3] = f[3 + i*4];
}
}
}
void VertexShaderCache::Init()
{
for( int i=0;i<(C_FOGPARAMS+8)*4;i++)