Merge branch 'gx-optimization'

This branch reduces the number of useless state flushes in the video
emulation layer by checking whether a BP/XF change will have an effect
or not. Greatly reduces the number of GL calls per frame.

Thanks to degasus for his help!
This commit is contained in:
Pierre Bourdon 2012-05-24 21:49:37 +02:00
commit cf69e7ca8a
2 changed files with 90 additions and 26 deletions

View File

@ -128,7 +128,27 @@ void BPWritten(const BPCmd& bp)
FlushPipeline(); FlushPipeline();
} }
} // END ZTP SPEEDUP HACK } // END ZTP SPEEDUP HACK
else FlushPipeline(); else
{
if (((s32*)&bpmem)[bp.address] == bp.newvalue)
{
if (!(bp.address == BPMEM_TRIGGER_EFB_COPY
|| bp.address == BPMEM_CLEARBBOX1
|| bp.address == BPMEM_CLEARBBOX2
|| bp.address == BPMEM_SETDRAWDONE
|| bp.address == BPMEM_PE_TOKEN_ID
|| bp.address == BPMEM_PE_TOKEN_INT_ID
|| bp.address == BPMEM_LOADTLUT0
|| bp.address == BPMEM_LOADTLUT1
|| bp.address == BPMEM_TEXINVALIDATE
|| bp.address == BPMEM_PRELOAD_MODE))
{
return;
}
}
FlushPipeline();
}
((u32*)&bpmem)[bp.address] = bp.newvalue; ((u32*)&bpmem)[bp.address] = bp.newvalue;

View File

@ -121,12 +121,18 @@ void XFRegWritten(int transferSize, u32 baseAddress, u32 *pData)
case XFMEM_SETVIEWPORT+3: case XFMEM_SETVIEWPORT+3:
case XFMEM_SETVIEWPORT+4: case XFMEM_SETVIEWPORT+4:
case XFMEM_SETVIEWPORT+5: case XFMEM_SETVIEWPORT+5:
{
u8 size = std::min(transferSize * 4, 6 * 4);
if (memcmp((u32*)&xfregs + (address - 0x1000), pData + dataIndex, size))
{
VertexManager::Flush(); VertexManager::Flush();
VertexShaderManager::SetViewportChanged(); VertexShaderManager::SetViewportChanged();
PixelShaderManager::SetViewportChanged(); PixelShaderManager::SetViewportChanged();
}
nextAddress = XFMEM_SETVIEWPORT + 6; nextAddress = XFMEM_SETVIEWPORT + 6;
break; break;
}
case XFMEM_SETPROJECTION: case XFMEM_SETPROJECTION:
case XFMEM_SETPROJECTION+1: case XFMEM_SETPROJECTION+1:
@ -135,11 +141,17 @@ void XFRegWritten(int transferSize, u32 baseAddress, u32 *pData)
case XFMEM_SETPROJECTION+4: case XFMEM_SETPROJECTION+4:
case XFMEM_SETPROJECTION+5: case XFMEM_SETPROJECTION+5:
case XFMEM_SETPROJECTION+6: case XFMEM_SETPROJECTION+6:
{
u8 size = std::min(transferSize * 4, 7 * 4);
if (memcmp((u32*)&xfregs + (address - 0x1000), pData + dataIndex, size))
{
VertexManager::Flush(); VertexManager::Flush();
VertexShaderManager::SetProjectionChanged(); VertexShaderManager::SetProjectionChanged();
}
nextAddress = XFMEM_SETPROJECTION + 7; nextAddress = XFMEM_SETPROJECTION + 7;
break; break;
}
case XFMEM_SETNUMTEXGENS: // GXSetNumTexGens case XFMEM_SETNUMTEXGENS: // GXSetNumTexGens
if (xfregs.numTexGen.numTexGens != (newValue & 15)) if (xfregs.numTexGen.numTexGens != (newValue & 15))
@ -154,10 +166,16 @@ void XFRegWritten(int transferSize, u32 baseAddress, u32 *pData)
case XFMEM_SETTEXMTXINFO+5: case XFMEM_SETTEXMTXINFO+5:
case XFMEM_SETTEXMTXINFO+6: case XFMEM_SETTEXMTXINFO+6:
case XFMEM_SETTEXMTXINFO+7: case XFMEM_SETTEXMTXINFO+7:
{
u8 size = std::min(transferSize * 4, 8 * 4);
if (memcmp((u32*)&xfregs + (address - 0x1000), pData + dataIndex, size))
{
VertexManager::Flush(); VertexManager::Flush();
}
nextAddress = XFMEM_SETTEXMTXINFO + 8; nextAddress = XFMEM_SETTEXMTXINFO + 8;
break; break;
}
case XFMEM_SETPOSMTXINFO: case XFMEM_SETPOSMTXINFO:
case XFMEM_SETPOSMTXINFO+1: case XFMEM_SETPOSMTXINFO+1:
@ -167,10 +185,16 @@ void XFRegWritten(int transferSize, u32 baseAddress, u32 *pData)
case XFMEM_SETPOSMTXINFO+5: case XFMEM_SETPOSMTXINFO+5:
case XFMEM_SETPOSMTXINFO+6: case XFMEM_SETPOSMTXINFO+6:
case XFMEM_SETPOSMTXINFO+7: case XFMEM_SETPOSMTXINFO+7:
{
u8 size = std::min(transferSize * 4, 8 * 4);
if (memcmp((u32*)&xfregs + (address - 0x1000), pData + dataIndex, size))
{
VertexManager::Flush(); VertexManager::Flush();
}
nextAddress = XFMEM_SETPOSMTXINFO + 8; nextAddress = XFMEM_SETPOSMTXINFO + 8;
break; break;
}
// -------------- // --------------
// Unknown Regs // Unknown Regs
@ -240,8 +264,15 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData)
transferSize = 0; transferSize = 0;
} }
for (u32 i = 0; i < xfMemTransferSize; ++i)
{
if (((u32*)&xfmem[xfMemBase])[i] != pData[i])
{
XFMemWritten(xfMemTransferSize, xfMemBase); XFMemWritten(xfMemTransferSize, xfMemBase);
memcpy_gc(&xfmem[xfMemBase], pData, xfMemTransferSize * 4); memcpy_gc(&xfmem[xfMemBase], pData, xfMemTransferSize * 4);
break;
}
}
pData += xfMemTransferSize; pData += xfMemTransferSize;
} }
@ -262,8 +293,21 @@ void LoadIndexedXF(u32 val, int refarray)
int size = ((val >> 12) & 0xF) + 1; int size = ((val >> 12) & 0xF) + 1;
//load stuff from array to address in xf mem //load stuff from array to address in xf mem
u32* currData = (u32*)(xfmem + address);
u32* newData = (u32*)Memory::GetPointer(arraybases[refarray] + arraystrides[refarray] * index);
bool changed = false;
for (int i = 0; i < size; ++i)
{
if (currData[i] != Common::swap32(newData[i]))
{
changed = true;
XFMemWritten(size, address); XFMemWritten(size, address);
break;
for (int i = 0; i < size; i++) }
xfmem[address + i] = Memory::Read_U32(arraybases[refarray] + arraystrides[refarray] * index + i * 4); }
if (changed)
{
for (int i = 0; i < size; ++i)
currData[i] = Common::swap32(newData[i]);
}
} }