Frameskipping more aggressive (minor speedup, plz report any serious problems). Initial display list cache implementation, disabled for now. Various cleanup.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3952 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
95b39da7ca
commit
6003c9ecd2
|
@ -20,17 +20,22 @@
|
||||||
|
|
||||||
extern u8* g_pVideoData;
|
extern u8* g_pVideoData;
|
||||||
|
|
||||||
inline u8 DataPeek8(u32 _uOffset)
|
inline void DataSkip(u32 skip)
|
||||||
|
{
|
||||||
|
g_pVideoData += skip;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline u8 DataPeek8(int _uOffset)
|
||||||
{
|
{
|
||||||
return g_pVideoData[_uOffset];
|
return g_pVideoData[_uOffset];
|
||||||
}
|
}
|
||||||
|
|
||||||
inline u16 DataPeek16(u32 _uOffset)
|
inline u16 DataPeek16(int _uOffset)
|
||||||
{
|
{
|
||||||
return Common::swap16(*(u16*)&g_pVideoData[_uOffset]);
|
return Common::swap16(*(u16*)&g_pVideoData[_uOffset]);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline u32 DataPeek32(u32 _uOffset)
|
inline u32 DataPeek32(int _uOffset)
|
||||||
{
|
{
|
||||||
return Common::swap32(*(u32*)&g_pVideoData[_uOffset]);
|
return Common::swap32(*(u32*)&g_pVideoData[_uOffset]);
|
||||||
}
|
}
|
||||||
|
@ -118,9 +123,4 @@ inline u8* DataGetPosition()
|
||||||
return g_pVideoData;
|
return g_pVideoData;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void DataSkip(u32 skip)
|
|
||||||
{
|
|
||||||
g_pVideoData += skip;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -97,7 +97,8 @@ void Fifo_ExitLoop()
|
||||||
|
|
||||||
// May be executed from any thread, even the graphics thread.
|
// May be executed from any thread, even the graphics thread.
|
||||||
// Created to allow for self shutdown.
|
// Created to allow for self shutdown.
|
||||||
void Fifo_ExitLoopNonBlocking() {
|
void Fifo_ExitLoopNonBlocking()
|
||||||
|
{
|
||||||
fifoStateRun = false;
|
fifoStateRun = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,7 +119,7 @@ void Fifo_SendFifoData(u8* _uData, u32 len)
|
||||||
// Copy new video instructions to videoBuffer for future use in rendering the new picture
|
// Copy new video instructions to videoBuffer for future use in rendering the new picture
|
||||||
memcpy(videoBuffer + size, _uData, len);
|
memcpy(videoBuffer + size, _uData, len);
|
||||||
size += len;
|
size += len;
|
||||||
OpcodeDecoder_Run();
|
OpcodeDecoder_Run(g_bSkipCurrentFrame);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Description: Main FIFO update loop
|
// Description: Main FIFO update loop
|
||||||
|
|
|
@ -19,10 +19,11 @@
|
||||||
// Ikaruga uses (nearly) NO display lists!
|
// Ikaruga uses (nearly) NO display lists!
|
||||||
// Zelda WW uses TONS of display lists
|
// Zelda WW uses TONS of display lists
|
||||||
// Zelda TP uses almost 100% display lists except menus (we like this!)
|
// Zelda TP uses almost 100% display lists except menus (we like this!)
|
||||||
|
// Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!)
|
||||||
|
|
||||||
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they are
|
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they are
|
||||||
// and hope that the vertex format doesn't change, though, if you do it just when they are
|
// while interpreting them, and hope that the vertex format doesn't change, though, if you do it right
|
||||||
// called. The reason is that the vertex format affects the sizes of the vertices.
|
// when they are called. The reason is that the vertex format affects the sizes of the vertices.
|
||||||
|
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "VideoCommon.h"
|
#include "VideoCommon.h"
|
||||||
|
@ -47,10 +48,9 @@ extern u8* FAKE_GetFifoEndPtr();
|
||||||
|
|
||||||
static void Decode();
|
static void Decode();
|
||||||
|
|
||||||
static void ExecuteDisplayList(u32 address, u32 size)
|
void InterpretDisplayList(u32 address, u32 size)
|
||||||
{
|
{
|
||||||
u8* old_pVideoData = g_pVideoData;
|
u8* old_pVideoData = g_pVideoData;
|
||||||
|
|
||||||
u8* startAddress = Memory_GetPtr(address);
|
u8* startAddress = Memory_GetPtr(address);
|
||||||
|
|
||||||
// Avoid the crash if Memory_GetPtr failed ..
|
// Avoid the crash if Memory_GetPtr failed ..
|
||||||
|
@ -61,7 +61,8 @@ static void ExecuteDisplayList(u32 address, u32 size)
|
||||||
// temporarily swap dl and non-dl (small "hack" for the stats)
|
// temporarily swap dl and non-dl (small "hack" for the stats)
|
||||||
Statistics::SwapDL();
|
Statistics::SwapDL();
|
||||||
|
|
||||||
while ((u32)(g_pVideoData - startAddress) < size)
|
u8 *end = g_pVideoData + size;
|
||||||
|
while (g_pVideoData < end)
|
||||||
{
|
{
|
||||||
Decode();
|
Decode();
|
||||||
}
|
}
|
||||||
|
@ -76,48 +77,60 @@ static void ExecuteDisplayList(u32 address, u32 size)
|
||||||
g_pVideoData = old_pVideoData;
|
g_pVideoData = old_pVideoData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Defer to plugin-specific DL cache.
|
||||||
|
extern bool HandleDisplayList(u32 address, u32 size);
|
||||||
|
|
||||||
|
void ExecuteDisplayList(u32 address, u32 size)
|
||||||
|
{
|
||||||
|
if (!HandleDisplayList(address, size))
|
||||||
|
InterpretDisplayList(address, size);
|
||||||
|
}
|
||||||
|
|
||||||
bool FifoCommandRunnable()
|
bool FifoCommandRunnable()
|
||||||
{
|
{
|
||||||
u32 iBufferSize = (u32)(FAKE_GetFifoEndPtr() - g_pVideoData);
|
u32 buffer_size = (u32)(FAKE_GetFifoEndPtr() - g_pVideoData);
|
||||||
if (iBufferSize == 0)
|
if (buffer_size == 0)
|
||||||
return false; // can't peek
|
return false; // can't peek
|
||||||
|
|
||||||
u8 Cmd = DataPeek8(0);
|
u8 cmd_byte = DataPeek8(0);
|
||||||
u32 iCommandSize = 0;
|
u32 command_size = 0;
|
||||||
|
|
||||||
switch (Cmd)
|
switch (cmd_byte)
|
||||||
{
|
{
|
||||||
case GX_NOP: // Hm, this means that we scan over nop streams pretty slowly...
|
case GX_NOP: // Hm, this means that we scan over nop streams pretty slowly...
|
||||||
case GX_CMD_INVL_VC: // Invalidate Vertex Cache - no parameters
|
case GX_CMD_INVL_VC: // Invalidate Vertex Cache - no parameters
|
||||||
case 0x44: // zelda 4 swords calls it and checks the metrics registers after that
|
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
||||||
iCommandSize = 1;
|
command_size = 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_BP_REG:
|
||||||
|
command_size = 5;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GX_LOAD_CP_REG:
|
case GX_LOAD_CP_REG:
|
||||||
iCommandSize = 6;
|
command_size = 6;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GX_LOAD_INDX_A:
|
case GX_LOAD_INDX_A:
|
||||||
case GX_LOAD_INDX_B:
|
case GX_LOAD_INDX_B:
|
||||||
case GX_LOAD_INDX_C:
|
case GX_LOAD_INDX_C:
|
||||||
case GX_LOAD_INDX_D:
|
case GX_LOAD_INDX_D:
|
||||||
case GX_LOAD_BP_REG:
|
command_size = 5;
|
||||||
iCommandSize = 5;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GX_CMD_CALL_DL:
|
case GX_CMD_CALL_DL:
|
||||||
iCommandSize = 9;
|
command_size = 9;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GX_LOAD_XF_REG:
|
case GX_LOAD_XF_REG:
|
||||||
{
|
{
|
||||||
// check if we can read the header
|
// check if we can read the header
|
||||||
if (iBufferSize >= 5)
|
if (buffer_size >= 5)
|
||||||
{
|
{
|
||||||
iCommandSize = 1 + 4;
|
command_size = 1 + 4;
|
||||||
u32 Cmd2 = DataPeek32(1);
|
u32 Cmd2 = DataPeek32(1);
|
||||||
int dwTransferSize = ((Cmd2 >> 16) & 15) + 1;
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||||
iCommandSize += dwTransferSize * 4;
|
command_size += transfer_size * 4;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -127,14 +140,14 @@ bool FifoCommandRunnable()
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (Cmd & 0x80)
|
if (cmd_byte & 0x80)
|
||||||
{
|
{
|
||||||
// check if we can read the header
|
// check if we can read the header
|
||||||
if (iBufferSize >= 3)
|
if (buffer_size >= 3)
|
||||||
{
|
{
|
||||||
iCommandSize = 1 + 2;
|
command_size = 1 + 2;
|
||||||
u16 numVertices = DataPeek16(1);
|
u16 numVertices = DataPeek16(1);
|
||||||
iCommandSize += numVertices * VertexLoaderManager::GetVertexSize(Cmd & GX_VAT_MASK);
|
command_size += numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -151,14 +164,14 @@ bool FifoCommandRunnable()
|
||||||
"* Command stream corrupted by some spurious memory bug\n"
|
"* Command stream corrupted by some spurious memory bug\n"
|
||||||
"* This really is an unknown opcode (unlikely)\n"
|
"* This really is an unknown opcode (unlikely)\n"
|
||||||
"* Some other sort of bug\n\n"
|
"* Some other sort of bug\n\n"
|
||||||
"Dolphin will now likely crash or hang. Enjoy." , Cmd);
|
"Dolphin will now likely crash or hang. Enjoy." , cmd_byte);
|
||||||
g_VideoInitialize.pSysMessage(szTemp);
|
g_VideoInitialize.pSysMessage(szTemp);
|
||||||
g_VideoInitialize.pLog(szTemp, TRUE);
|
g_VideoInitialize.pLog(szTemp, TRUE);
|
||||||
{
|
{
|
||||||
SCPFifoStruct &fifo = *g_VideoInitialize.pCPFifo;
|
SCPFifoStruct &fifo = *g_VideoInitialize.pCPFifo;
|
||||||
|
|
||||||
char szTmp[256];
|
char szTmp[256];
|
||||||
// sprintf(szTmp, "Illegal command %02x (at %08x)",Cmd,g_pDataReader->GetPtr());
|
// sprintf(szTmp, "Illegal command %02x (at %08x)",cmd_byte,g_pDataReader->GetPtr());
|
||||||
sprintf(szTmp, "Illegal command %02x\n"
|
sprintf(szTmp, "Illegal command %02x\n"
|
||||||
"CPBase: 0x%08x\n"
|
"CPBase: 0x%08x\n"
|
||||||
"CPEnd: 0x%08x\n"
|
"CPEnd: 0x%08x\n"
|
||||||
|
@ -172,42 +185,39 @@ bool FifoCommandRunnable()
|
||||||
"bFF_BPEnable: %s\n"
|
"bFF_BPEnable: %s\n"
|
||||||
"bFF_GPLinkEnable: %s\n"
|
"bFF_GPLinkEnable: %s\n"
|
||||||
"bFF_Breakpoint: %s\n"
|
"bFF_Breakpoint: %s\n"
|
||||||
,Cmd, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance
|
,cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance
|
||||||
,fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false"
|
,fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false"
|
||||||
,fifo.bFF_BPEnable ? "true" : "false" ,fifo.bFF_GPLinkEnable ? "true" : "false"
|
,fifo.bFF_BPEnable ? "true" : "false" ,fifo.bFF_GPLinkEnable ? "true" : "false"
|
||||||
,fifo.bFF_Breakpoint ? "true" : "false");
|
,fifo.bFF_Breakpoint ? "true" : "false");
|
||||||
|
|
||||||
g_VideoInitialize.pSysMessage(szTmp);
|
g_VideoInitialize.pSysMessage(szTmp);
|
||||||
g_VideoInitialize.pLog(szTmp, TRUE);
|
g_VideoInitialize.pLog(szTmp, TRUE);
|
||||||
// _assert_msg_(0,szTmp,"");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iCommandSize > iBufferSize)
|
if (command_size > buffer_size)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// INFO_LOG("OP detected: Cmd 0x%x size %i buffer %i",Cmd, iCommandSize, iBufferSize);
|
// INFO_LOG("OP detected: cmd_byte 0x%x size %i buffer %i",cmd_byte, command_size, buffer_size);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Decode()
|
static void Decode()
|
||||||
{
|
{
|
||||||
int Cmd = DataReadU8();
|
int cmd_byte = DataReadU8();
|
||||||
|
switch (cmd_byte)
|
||||||
switch(Cmd)
|
|
||||||
{
|
{
|
||||||
case GX_NOP:
|
case GX_NOP:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GX_LOAD_CP_REG: //0x08
|
case GX_LOAD_CP_REG: //0x08
|
||||||
{
|
{
|
||||||
u32 SubCmd = DataReadU8();
|
u8 sub_cmd = DataReadU8();
|
||||||
u32 Value = DataReadU32();
|
u32 value = DataReadU32();
|
||||||
LoadCPReg(SubCmd, Value);
|
LoadCPReg(sub_cmd, value);
|
||||||
INCSTAT(stats.thisFrame.numCPLoads);
|
INCSTAT(stats.thisFrame.numCPLoads);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -215,13 +225,13 @@ static void Decode()
|
||||||
case GX_LOAD_XF_REG:
|
case GX_LOAD_XF_REG:
|
||||||
{
|
{
|
||||||
u32 Cmd2 = DataReadU32();
|
u32 Cmd2 = DataReadU32();
|
||||||
int dwTransferSize = ((Cmd2 >> 16) & 15) + 1;
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||||
u32 dwAddress = Cmd2 & 0xFFFF;
|
u32 address = Cmd2 & 0xFFFF;
|
||||||
// TODO - speed this up. pshufb?
|
// TODO - speed this up. pshufb?
|
||||||
static u32 pData[16];
|
u32 data_buffer[16];
|
||||||
for (int i = 0; i < dwTransferSize; i++)
|
for (int i = 0; i < transfer_size; i++)
|
||||||
pData[i] = DataReadU32();
|
data_buffer[i] = DataReadU32();
|
||||||
LoadXFReg(dwTransferSize, dwAddress, pData);
|
LoadXFReg(transfer_size, address, data_buffer);
|
||||||
INCSTAT(stats.thisFrame.numXFLoads);
|
INCSTAT(stats.thisFrame.numXFLoads);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -241,13 +251,13 @@ static void Decode()
|
||||||
|
|
||||||
case GX_CMD_CALL_DL:
|
case GX_CMD_CALL_DL:
|
||||||
{
|
{
|
||||||
u32 dwAddr = DataReadU32();
|
u32 address = DataReadU32();
|
||||||
u32 dwCount = DataReadU32();
|
u32 count = DataReadU32();
|
||||||
ExecuteDisplayList(dwAddr, dwCount);
|
ExecuteDisplayList(address, count);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x44: // zelda 4 swords calls it and checks the metrics registers after that
|
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
||||||
DEBUG_LOG(VIDEO, "GX 0x44: %08x", Cmd);
|
DEBUG_LOG(VIDEO, "GX 0x44: %08x", Cmd);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -257,31 +267,107 @@ static void Decode()
|
||||||
|
|
||||||
case GX_LOAD_BP_REG: //0x61
|
case GX_LOAD_BP_REG: //0x61
|
||||||
{
|
{
|
||||||
u32 cmd = DataReadU32();
|
u32 bp_cmd = DataReadU32();
|
||||||
LoadBPReg(cmd);
|
LoadBPReg(bp_cmd);
|
||||||
INCSTAT(stats.thisFrame.numBPLoads);
|
INCSTAT(stats.thisFrame.numBPLoads);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// draw primitives
|
// draw primitives
|
||||||
default:
|
default:
|
||||||
if (Cmd & 0x80)
|
if (cmd_byte & 0x80)
|
||||||
{
|
{
|
||||||
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
||||||
u16 numVertices = DataReadU16();
|
u16 numVertices = DataReadU16();
|
||||||
|
|
||||||
VertexLoaderManager::RunVertices(
|
VertexLoaderManager::RunVertices(
|
||||||
Cmd & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||||
(Cmd & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||||
numVertices);
|
numVertices);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// char szTmp[256];
|
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte);
|
||||||
//sprintf(szTmp, "Illegal command %02x (at %08x)",Cmd,g_pDataReader->GetPtr());
|
break;
|
||||||
//g_VideoInitialize.pLog(szTmp);
|
}
|
||||||
//MessageBox(0,szTmp,"GFX ERROR",0);
|
break;
|
||||||
// _assert_msg_(0,szTmp,"");
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void DecodeSemiNop()
|
||||||
|
{
|
||||||
|
int cmd_byte = DataReadU8();
|
||||||
|
switch (cmd_byte)
|
||||||
|
{
|
||||||
|
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
||||||
|
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
|
||||||
|
case GX_NOP:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_CP_REG: //0x08
|
||||||
|
// We have to let CP writes through because they determine the size of vertices.
|
||||||
|
{
|
||||||
|
u8 sub_cmd = DataReadU8();
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
LoadCPReg(sub_cmd, value);
|
||||||
|
INCSTAT(stats.thisFrame.numCPLoads);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_XF_REG:
|
||||||
|
{
|
||||||
|
u32 Cmd2 = DataReadU32();
|
||||||
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||||
|
u32 address = Cmd2 & 0xFFFF;
|
||||||
|
// TODO - speed this up. pshufb?
|
||||||
|
u32 data_buffer[16];
|
||||||
|
for (int i = 0; i < transfer_size; i++)
|
||||||
|
data_buffer[i] = DataReadU32();
|
||||||
|
LoadXFReg(transfer_size, address, data_buffer);
|
||||||
|
INCSTAT(stats.thisFrame.numXFLoads);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_INDX_A: //used for position matrices
|
||||||
|
LoadIndexedXF(DataReadU32(), 0xC);
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_B: //used for normal matrices
|
||||||
|
LoadIndexedXF(DataReadU32(), 0xD);
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_C: //used for postmatrices
|
||||||
|
LoadIndexedXF(DataReadU32(), 0xE);
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_D: //used for lights
|
||||||
|
LoadIndexedXF(DataReadU32(), 0xF);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_CMD_CALL_DL:
|
||||||
|
// Hm, wonder if any games put tokens in display lists - in that case,
|
||||||
|
// we'll have to parse them too.
|
||||||
|
DataSkip(8);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_BP_REG: //0x61
|
||||||
|
// We have to let BP writes through because they set tokens and stuff.
|
||||||
|
// TODO: Call a much simplified LoadBPReg instead.
|
||||||
|
{
|
||||||
|
u32 bp_cmd = DataReadU32();
|
||||||
|
LoadBPReg(bp_cmd);
|
||||||
|
INCSTAT(stats.thisFrame.numBPLoads);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
// draw primitives
|
||||||
|
default:
|
||||||
|
if (cmd_byte & 0x80)
|
||||||
|
{
|
||||||
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
||||||
|
u16 numVertices = DataReadU16();
|
||||||
|
DataSkip(numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -298,13 +384,17 @@ void OpcodeDecoder_Shutdown()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpcodeDecoder_Run()
|
void OpcodeDecoder_Run(bool skipped_frame)
|
||||||
{
|
{
|
||||||
DVSTARTPROFILE();
|
DVSTARTPROFILE();
|
||||||
while (FifoCommandRunnable())
|
if (!skipped_frame)
|
||||||
{
|
{
|
||||||
//TODO?: if really needed, do something like this: "InterlockedExchange((LONG*)&_fifo.CPCmdIdle, 0);"
|
while (FifoCommandRunnable())
|
||||||
Decode();
|
Decode();
|
||||||
}
|
}
|
||||||
//TODO?: if really needed, do something like this: "InterlockedExchange((LONG*)&_fifo.CPCmdIdle, 1);"
|
else
|
||||||
|
{
|
||||||
|
while (FifoCommandRunnable())
|
||||||
|
DecodeSemiNop();
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -29,6 +29,7 @@
|
||||||
#define GX_LOAD_INDX_D 0x38
|
#define GX_LOAD_INDX_D 0x38
|
||||||
|
|
||||||
#define GX_CMD_CALL_DL 0x40
|
#define GX_CMD_CALL_DL 0x40
|
||||||
|
#define GX_CMD_UNKNOWN_METRICS 0x44
|
||||||
#define GX_CMD_INVL_VC 0x48
|
#define GX_CMD_INVL_VC 0x48
|
||||||
|
|
||||||
#define GX_PRIMITIVE_MASK 0x78
|
#define GX_PRIMITIVE_MASK 0x78
|
||||||
|
@ -46,6 +47,6 @@
|
||||||
|
|
||||||
void OpcodeDecoder_Init();
|
void OpcodeDecoder_Init();
|
||||||
void OpcodeDecoder_Shutdown();
|
void OpcodeDecoder_Shutdown();
|
||||||
void OpcodeDecoder_Run();
|
void OpcodeDecoder_Run(bool skipped_frame);
|
||||||
|
|
||||||
#endif // _OPCODE_DECODING_H
|
#endif // _OPCODE_DECODING_H
|
||||||
|
|
|
@ -234,6 +234,7 @@ void PixelShaderManager::SetPSTextureDims(int texid)
|
||||||
SetPSConstant4fv(C_TEXDIMS + texid, fdims);
|
SetPSConstant4fv(C_TEXDIMS + texid, fdims);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This one is high in profiles (0.5%)
|
||||||
void PixelShaderManager::SetColorChanged(int type, int num)
|
void PixelShaderManager::SetColorChanged(int type, int num)
|
||||||
{
|
{
|
||||||
int r = bpmem.tevregs[num].low.a;
|
int r = bpmem.tevregs[num].low.a;
|
||||||
|
@ -241,10 +242,10 @@ void PixelShaderManager::SetColorChanged(int type, int num)
|
||||||
int b = bpmem.tevregs[num].high.a;
|
int b = bpmem.tevregs[num].high.a;
|
||||||
int g = bpmem.tevregs[num].high.b;
|
int g = bpmem.tevregs[num].high.b;
|
||||||
float *pf = &lastRGBAfull[type][num][0];
|
float *pf = &lastRGBAfull[type][num][0];
|
||||||
pf[0] = (float)r / 255.0f;
|
pf[0] = (float)r * (1.0f / 255.0f);
|
||||||
pf[1] = (float)g / 255.0f;
|
pf[1] = (float)g * (1.0f / 255.0f);
|
||||||
pf[2] = (float)b / 255.0f;
|
pf[2] = (float)b * (1.0f / 255.0f);
|
||||||
pf[3] = (float)a / 255.0f;
|
pf[3] = (float)a * (1.0f / 255.0f);
|
||||||
s_nColorsChanged[type] |= 1 << num;
|
s_nColorsChanged[type] |= 1 << num;
|
||||||
PRIM_LOG("pixel %scolor%d: %f %f %f %f\n", type?"k":"", num, pf[0], pf[1], pf[2], pf[3]);
|
PRIM_LOG("pixel %scolor%d: %f %f %f %f\n", type?"k":"", num, pf[0], pf[1], pf[2], pf[3]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -289,6 +289,7 @@ void LOADERDECL TexCoord_ReadIndex16_Short1()
|
||||||
}
|
}
|
||||||
void LOADERDECL TexCoord_ReadIndex16_Short2()
|
void LOADERDECL TexCoord_ReadIndex16_Short2()
|
||||||
{
|
{
|
||||||
|
// Heavy in ZWW
|
||||||
u16 Index = DataReadU16();
|
u16 Index = DataReadU16();
|
||||||
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
|
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
|
||||||
|
|
|
@ -18,33 +18,26 @@
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "UCode_Zelda.h"
|
#include "UCode_Zelda.h"
|
||||||
|
|
||||||
void CUCode_Zelda::AFCdecodebuffer(const s16 *coef, const char *input, signed short *out, short *histp, short *hist2p, int type)
|
void CUCode_Zelda::AFCdecodebuffer(const s16 *coef, const char *src, signed short *out, short *histp, short *hist2p, int type)
|
||||||
{
|
{
|
||||||
short nibbles[16];
|
|
||||||
short hist = *histp;
|
|
||||||
short hist2 = *hist2p;
|
|
||||||
|
|
||||||
const char *src = input;
|
|
||||||
char *dst = (char*)out;
|
|
||||||
|
|
||||||
// First 2 nibbles are ADPCM scale etc.
|
// First 2 nibbles are ADPCM scale etc.
|
||||||
short delta = 1 << (((*src) >> 4) & 0xf);
|
short delta = 1 << (((*src) >> 4) & 0xf);
|
||||||
short idx = (*src) & 0xf;
|
short idx = (*src) & 0xf;
|
||||||
src++;
|
src++;
|
||||||
|
|
||||||
|
short nibbles[16];
|
||||||
if (type == 9)
|
if (type == 9)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 16; i = i + 2) {
|
for (int i = 0; i < 16; i += 2)
|
||||||
int j = (*src & 255) >> 4;
|
{
|
||||||
nibbles[i] = j;
|
nibbles[i + 0] = *src >> 4;
|
||||||
j = *src & 255 & 15;
|
nibbles[i + 1] = *src & 15;
|
||||||
nibbles[i+1] = j;
|
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
for (int i = 0; i < 16; i = i + 1) {
|
|
||||||
if (nibbles[i] >= 8)
|
if (nibbles[i] >= 8)
|
||||||
nibbles[i] = nibbles[i] - 16;
|
nibbles[i] = nibbles[i] - 16;
|
||||||
|
nibbles[i] <<= 11;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -52,45 +45,33 @@ void CUCode_Zelda::AFCdecodebuffer(const s16 *coef, const char *input, signed sh
|
||||||
// In Pikmin, Dolphin's engine sound is using AFC 5bits, even though such a sound is hard
|
// In Pikmin, Dolphin's engine sound is using AFC 5bits, even though such a sound is hard
|
||||||
// to compare, it seems like to sound exactly like a real GC
|
// to compare, it seems like to sound exactly like a real GC
|
||||||
DEBUG_LOG(DSPHLE, "5 bits AFC sample");
|
DEBUG_LOG(DSPHLE, "5 bits AFC sample");
|
||||||
|
|
||||||
for (int i = 0; i < 16; i += 4)
|
for (int i = 0; i < 16; i += 4)
|
||||||
{
|
{
|
||||||
int j = (*src >> 0) & 0x02;
|
nibbles[i + 0] = (*src >> 6) & 0x02;
|
||||||
nibbles[i] = j;
|
nibbles[i + 1] = (*src >> 4) & 0x02;
|
||||||
|
nibbles[i + 2] = (*src >> 2) & 0x02;
|
||||||
j = (*src >> 2) & 0x02;
|
nibbles[i + 3] = (*src >> 0) & 0x02;
|
||||||
nibbles[i+1] = j;
|
|
||||||
|
|
||||||
j = (*src >> 4) & 0x02;
|
|
||||||
nibbles[i+2] = j;
|
|
||||||
|
|
||||||
j = (*src >> 6) & 0x02;
|
|
||||||
nibbles[i+3] = j;
|
|
||||||
|
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
if (nibbles[i] >= 2)
|
if (nibbles[i] >= 2)
|
||||||
nibbles[i] = nibbles[i] - 4;
|
nibbles[i] = nibbles[i] - 4;
|
||||||
|
nibbles[i] <<= 13;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
short hist = *histp;
|
||||||
|
short hist2 = *hist2p;
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
int sample = (delta * nibbles[i]) << 11;
|
int sample = delta * nibbles[i] + ((long)hist * coef[idx * 2]) + ((long)hist2 * coef[idx * 2 + 1]);
|
||||||
sample += ((long)hist * coef[idx * 2]) + ((long)hist2 * coef[idx * 2 + 1]);
|
sample >>= 11;
|
||||||
sample = sample >> 11;
|
if (sample > 32767)
|
||||||
|
|
||||||
if (sample > 32767) {
|
|
||||||
sample = 32767;
|
sample = 32767;
|
||||||
}
|
if (sample < -32768)
|
||||||
if (sample < -32768) {
|
|
||||||
sample = -32768;
|
sample = -32768;
|
||||||
}
|
out[i] = sample;
|
||||||
*(short*)dst = (short)sample;
|
|
||||||
dst = dst + 2;
|
|
||||||
hist2 = hist;
|
hist2 = hist;
|
||||||
hist = (short)sample;
|
hist = (short)sample;
|
||||||
}
|
}
|
||||||
|
|
|
@ -720,6 +720,14 @@
|
||||||
RelativePath=".\Src\BPFunctions.cpp"
|
RelativePath=".\Src\BPFunctions.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath=".\Src\DLCache.cpp"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath=".\Src\DLCache.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath=".\Src\NativeVertexFormat.cpp"
|
RelativePath=".\Src\NativeVertexFormat.cpp"
|
||||||
>
|
>
|
||||||
|
|
|
@ -0,0 +1,564 @@
|
||||||
|
// Copyright (C) 2003-2009 Dolphin Project.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, version 2.0.
|
||||||
|
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License 2.0 for more details.
|
||||||
|
|
||||||
|
// A copy of the GPL 2.0 should have been included with the program.
|
||||||
|
// If not, see http://www.gnu.org/licenses/
|
||||||
|
|
||||||
|
// Official SVN repository and contact information can be found at
|
||||||
|
// http://code.google.com/p/dolphin-emu/
|
||||||
|
|
||||||
|
// TODO: Handle cache-is-full condition :p
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "Common.h"
|
||||||
|
#include "VideoCommon.h"
|
||||||
|
#include "Hash.h"
|
||||||
|
#include "MemoryUtil.h"
|
||||||
|
#include "DataReader.h"
|
||||||
|
#include "Statistics.h"
|
||||||
|
#include "OpcodeDecoding.h" // For the GX_ constants.
|
||||||
|
|
||||||
|
#include "XFMemory.h"
|
||||||
|
#include "CPMemory.h"
|
||||||
|
#include "BPMemory.h"
|
||||||
|
|
||||||
|
#include "VertexManager.h"
|
||||||
|
#include "VertexLoaderManager.h"
|
||||||
|
|
||||||
|
#include "x64Emitter.h"
|
||||||
|
#include "ABI.h"
|
||||||
|
|
||||||
|
#include "DLCache.h"
|
||||||
|
|
||||||
|
#define DL_CODE_CACHE_SIZE (1024*1024*16)
|
||||||
|
#define DL_STATIC_DATA_SIZE (1024*1024*4)
|
||||||
|
extern int frameCount;
|
||||||
|
|
||||||
|
using namespace Gen;
|
||||||
|
|
||||||
|
namespace DLCache
|
||||||
|
{
|
||||||
|
|
||||||
|
// Currently just recompiles the DLs themselves, doesn't bother with the vertex data.
|
||||||
|
// The speed boost is pretty small. The real big boost will come when we also store
|
||||||
|
// vertex arrays in the cached DLs.
|
||||||
|
|
||||||
|
enum DisplayListPass {
|
||||||
|
DLPASS_ANALYZE,
|
||||||
|
DLPASS_COMPILE,
|
||||||
|
DLPASS_RUN,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VDataHashRegion
|
||||||
|
{
|
||||||
|
u32 hash;
|
||||||
|
u32 start_address;
|
||||||
|
int size;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CachedDisplayList
|
||||||
|
{
|
||||||
|
CachedDisplayList()
|
||||||
|
: uncachable(false),
|
||||||
|
pass(DLPASS_ANALYZE),
|
||||||
|
next_check(1)
|
||||||
|
{
|
||||||
|
frame_count = frameCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pass;
|
||||||
|
u32 dl_hash;
|
||||||
|
|
||||||
|
int check;
|
||||||
|
int next_check;
|
||||||
|
|
||||||
|
u32 vdata_hash;
|
||||||
|
|
||||||
|
std::vector<VDataHashRegion> hash_regions;
|
||||||
|
|
||||||
|
int frame_count;
|
||||||
|
|
||||||
|
bool uncachable; // if set, this DL will always be interpreted. This gets set if hash ever changes.
|
||||||
|
|
||||||
|
// ... Something containing cached vertex buffers here ...
|
||||||
|
|
||||||
|
// Compile the commands themselves down to native code.
|
||||||
|
const u8 *compiled_code;
|
||||||
|
};
|
||||||
|
|
||||||
|
// We want to allow caching DLs that start at the same address but have different lengths,
|
||||||
|
// so the size has to be in the ID.
|
||||||
|
inline u64 CreateMapId(u32 address, u32 size)
|
||||||
|
{
|
||||||
|
return ((u64)address << 32) | size;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef std::map<u64, CachedDisplayList> DLMap;
|
||||||
|
|
||||||
|
static DLMap dl_map;
|
||||||
|
static u8 *dlcode_cache;
|
||||||
|
static u8 *static_data_buffer;
|
||||||
|
static u8 *static_data_ptr;
|
||||||
|
|
||||||
|
static Gen::XEmitter emitter;
|
||||||
|
|
||||||
|
// Everything gets free'd when the cache is cleared.
|
||||||
|
u8 *AllocStaticData(int size)
|
||||||
|
{
|
||||||
|
u8 *cur_ptr = static_data_ptr;
|
||||||
|
static_data_ptr += (size + 3) & ~3;
|
||||||
|
return cur_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// First pass - analyze
|
||||||
|
bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
|
||||||
|
{
|
||||||
|
int num_xf_reg = 0;
|
||||||
|
int num_cp_reg = 0;
|
||||||
|
int num_bp_reg = 0;
|
||||||
|
int num_index_xf = 0;
|
||||||
|
int num_draw_call = 0;
|
||||||
|
|
||||||
|
u8 *old_datareader = g_pVideoData;
|
||||||
|
g_pVideoData = Memory_GetPtr(address);
|
||||||
|
|
||||||
|
u8 *end = g_pVideoData + size;
|
||||||
|
while (g_pVideoData < end)
|
||||||
|
{
|
||||||
|
// Yet another reimplementation of the DL reading...
|
||||||
|
int cmd_byte = DataReadU8();
|
||||||
|
switch (cmd_byte)
|
||||||
|
{
|
||||||
|
case GX_NOP:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_CP_REG: //0x08
|
||||||
|
{
|
||||||
|
// Execute
|
||||||
|
u8 sub_cmd = DataReadU8();
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
LoadCPReg(sub_cmd, value);
|
||||||
|
INCSTAT(stats.thisFrame.numCPLoads);
|
||||||
|
|
||||||
|
// Analyze
|
||||||
|
num_cp_reg++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_XF_REG:
|
||||||
|
{
|
||||||
|
// Execute
|
||||||
|
u32 Cmd2 = DataReadU32();
|
||||||
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||||
|
u32 address = Cmd2 & 0xFFFF;
|
||||||
|
// TODO - speed this up. pshufb?
|
||||||
|
u32 data_buffer[16];
|
||||||
|
for (int i = 0; i < transfer_size; i++)
|
||||||
|
data_buffer[i] = DataReadU32();
|
||||||
|
LoadXFReg(transfer_size, address, data_buffer);
|
||||||
|
INCSTAT(stats.thisFrame.numXFLoads);
|
||||||
|
|
||||||
|
// Analyze
|
||||||
|
num_xf_reg++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_INDX_A: //used for position matrices
|
||||||
|
{
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadIndexedXF(value, 0xC);
|
||||||
|
// Analyze
|
||||||
|
num_index_xf++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_B: //used for normal matrices
|
||||||
|
{
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadIndexedXF(value, 0xD);
|
||||||
|
// Analyze
|
||||||
|
num_index_xf++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_C: //used for postmatrices
|
||||||
|
{
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadIndexedXF(value, 0xE);
|
||||||
|
// Analyze
|
||||||
|
num_index_xf++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_D: //used for lights
|
||||||
|
{
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadIndexedXF(value, 0xF);
|
||||||
|
// Analyze
|
||||||
|
num_index_xf++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_CMD_CALL_DL:
|
||||||
|
PanicAlert("Seeing DL call inside DL.");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_CMD_UNKNOWN_METRICS:
|
||||||
|
// zelda 4 swords calls it and checks the metrics registers after that
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_CMD_INVL_VC:// Invalidate (vertex cache?)
|
||||||
|
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_BP_REG: //0x61
|
||||||
|
{
|
||||||
|
u32 bp_cmd = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadBPReg(bp_cmd);
|
||||||
|
INCSTAT(stats.thisFrame.numBPLoads);
|
||||||
|
|
||||||
|
// Analyze
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
// draw primitives
|
||||||
|
default:
|
||||||
|
if (cmd_byte & 0x80)
|
||||||
|
{
|
||||||
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
||||||
|
|
||||||
|
// Execute
|
||||||
|
u16 numVertices = DataReadU16();
|
||||||
|
|
||||||
|
VertexLoaderManager::RunVertices(
|
||||||
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||||
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||||
|
numVertices);
|
||||||
|
|
||||||
|
// Analyze
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ERROR_LOG(VIDEO, "DLCache::CompileAndRun: Illegal command %02x", cmd_byte);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_pVideoData = old_datareader;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The only sensible way to detect changes to vertex data is to convert several times
|
||||||
|
// and hash the output.
|
||||||
|
|
||||||
|
// Second pass - compile
|
||||||
|
// Since some commands can affect the size of other commands, we really have no choice
|
||||||
|
// but to compile as we go, interpreting the list. We can't compile and then execute, we must
|
||||||
|
// compile AND execute at the same time. The second time the display list gets called, we already
|
||||||
|
// have the compiled code so we don't have to interpret anymore, we just run it.
|
||||||
|
bool CompileAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
|
||||||
|
{
|
||||||
|
VertexManager::Flush();
|
||||||
|
|
||||||
|
u8 *old_datareader = g_pVideoData;
|
||||||
|
g_pVideoData = Memory_GetPtr(address);
|
||||||
|
|
||||||
|
u8 *end = g_pVideoData + size;
|
||||||
|
|
||||||
|
emitter.AlignCode4();
|
||||||
|
dl->compiled_code = emitter.GetCodePtr();
|
||||||
|
emitter.ABI_EmitPrologue(4);
|
||||||
|
|
||||||
|
while (g_pVideoData < end)
|
||||||
|
{
|
||||||
|
// Yet another reimplementation of the DL reading...
|
||||||
|
int cmd_byte = DataReadU8();
|
||||||
|
switch (cmd_byte)
|
||||||
|
{
|
||||||
|
case GX_NOP:
|
||||||
|
// Execute
|
||||||
|
// Compile
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_CP_REG: //0x08
|
||||||
|
{
|
||||||
|
// Execute
|
||||||
|
u8 sub_cmd = DataReadU8();
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
LoadCPReg(sub_cmd, value);
|
||||||
|
INCSTAT(stats.thisFrame.numCPLoads);
|
||||||
|
|
||||||
|
// Compile
|
||||||
|
emitter.ABI_CallFunctionCC(&LoadCPReg, sub_cmd, value);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_XF_REG:
|
||||||
|
{
|
||||||
|
// Execute
|
||||||
|
u32 Cmd2 = DataReadU32();
|
||||||
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||||
|
u32 address = Cmd2 & 0xFFFF;
|
||||||
|
// TODO - speed this up. pshufb?
|
||||||
|
u8 *real_data_buffer = AllocStaticData(4 * transfer_size);
|
||||||
|
u32 *data_buffer = (u32 *)real_data_buffer;
|
||||||
|
for (int i = 0; i < transfer_size; i++)
|
||||||
|
data_buffer[i] = DataReadU32();
|
||||||
|
LoadXFReg(transfer_size, address, data_buffer);
|
||||||
|
INCSTAT(stats.thisFrame.numXFLoads);
|
||||||
|
|
||||||
|
// Compile
|
||||||
|
emitter.ABI_CallFunctionCCP(&LoadXFReg, transfer_size, address, data_buffer);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_INDX_A: //used for position matrices
|
||||||
|
{
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadIndexedXF(value, 0xC);
|
||||||
|
// Compile
|
||||||
|
emitter.ABI_CallFunctionCC(&LoadIndexedXF, value, 0xC);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_B: //used for normal matrices
|
||||||
|
{
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadIndexedXF(value, 0xD);
|
||||||
|
// Compile
|
||||||
|
emitter.ABI_CallFunctionCC(&LoadIndexedXF, value, 0xD);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_C: //used for postmatrices
|
||||||
|
{
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadIndexedXF(value, 0xE);
|
||||||
|
// Compile
|
||||||
|
emitter.ABI_CallFunctionCC(&LoadIndexedXF, value, 0xE);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GX_LOAD_INDX_D: //used for lights
|
||||||
|
{
|
||||||
|
u32 value = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadIndexedXF(value, 0xF);
|
||||||
|
// Compile
|
||||||
|
emitter.ABI_CallFunctionCC(&LoadIndexedXF, value, 0xF);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_CMD_CALL_DL:
|
||||||
|
PanicAlert("Seeing DL call inside DL.");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_CMD_UNKNOWN_METRICS:
|
||||||
|
// zelda 4 swords calls it and checks the metrics registers after that
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_CMD_INVL_VC:// Invalidate (vertex cache?)
|
||||||
|
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GX_LOAD_BP_REG: //0x61
|
||||||
|
{
|
||||||
|
u32 bp_cmd = DataReadU32();
|
||||||
|
// Execute
|
||||||
|
LoadBPReg(bp_cmd);
|
||||||
|
INCSTAT(stats.thisFrame.numBPLoads);
|
||||||
|
// Compile
|
||||||
|
emitter.ABI_CallFunctionC(&LoadBPReg, bp_cmd);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
// draw primitives
|
||||||
|
default:
|
||||||
|
if (cmd_byte & 0x80)
|
||||||
|
{
|
||||||
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
||||||
|
|
||||||
|
// Execute
|
||||||
|
u16 numVertices = DataReadU16();
|
||||||
|
|
||||||
|
u64 pre_draw_video_data = (u64)g_pVideoData;
|
||||||
|
|
||||||
|
VertexLoaderManager::RunVertices(
|
||||||
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||||
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||||
|
numVertices);
|
||||||
|
|
||||||
|
// Compile
|
||||||
|
#ifdef _M_X64
|
||||||
|
emitter.MOV(64, R(RAX), Imm64(pre_draw_video_data));
|
||||||
|
emitter.MOV(64, M(&g_pVideoData), R(RAX));
|
||||||
|
#else
|
||||||
|
emitter.MOV(32, R(EAX), Imm32(pre_draw_video_data));
|
||||||
|
emitter.MOV(32, M(&g_pVideoData), R(EAX));
|
||||||
|
#endif
|
||||||
|
emitter.ABI_CallFunctionCCC(
|
||||||
|
&VertexLoaderManager::RunVertices,
|
||||||
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||||
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||||
|
numVertices);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ERROR_LOG(VIDEO, "DLCache::CompileAndRun: Illegal command %02x", cmd_byte);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
emitter.ABI_EmitEpilogue(4);
|
||||||
|
|
||||||
|
g_pVideoData = old_datareader;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This one's pretty expensive. We should check if we can get away with only
|
||||||
|
// hashing the entire DL the first 3 frames or something.
|
||||||
|
u32 ComputeDLHash(u32 address, u32 size)
|
||||||
|
{
|
||||||
|
u8 *ptr = Memory_GetPtr(address);
|
||||||
|
return HashFletcher(ptr, size & ~1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Init()
|
||||||
|
{
|
||||||
|
dlcode_cache = (u8 *)AllocateExecutableMemory(DL_CODE_CACHE_SIZE, false); // Don't need low memory.
|
||||||
|
static_data_buffer = (u8 *)AllocateMemoryPages(DL_STATIC_DATA_SIZE);
|
||||||
|
static_data_ptr = static_data_buffer;
|
||||||
|
emitter.SetCodePtr(dlcode_cache);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Shutdown()
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
FreeMemoryPages(dlcode_cache, DL_CODE_CACHE_SIZE);
|
||||||
|
FreeMemoryPages(static_data_buffer, DL_STATIC_DATA_SIZE);
|
||||||
|
dlcode_cache = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clear()
|
||||||
|
{
|
||||||
|
dl_map.clear();
|
||||||
|
|
||||||
|
// Reset the cache pointers.
|
||||||
|
emitter.SetCodePtr(dlcode_cache);
|
||||||
|
static_data_ptr = static_data_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProgressiveCleanup()
|
||||||
|
{
|
||||||
|
DLMap::iterator iter = dl_map.begin();
|
||||||
|
while (iter != dl_map.end()) {
|
||||||
|
CachedDisplayList &entry = iter->second;
|
||||||
|
int limit = iter->second.uncachable ? 1200 : 400;
|
||||||
|
if (entry.frame_count < frameCount - limit) {
|
||||||
|
// entry.Destroy();
|
||||||
|
#ifdef _WIN32
|
||||||
|
iter = dl_map.erase(iter);
|
||||||
|
#else
|
||||||
|
dl_map.erase(iter++); // (this is gcc standard!)
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
iter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// NOTE - outside the namespace on purpose.
|
||||||
|
bool HandleDisplayList(u32 address, u32 size)
|
||||||
|
{
|
||||||
|
// Disable display list caching since the benefit isn't much to write home about
|
||||||
|
// right now...
|
||||||
|
return false;
|
||||||
|
|
||||||
|
u64 dl_id = DLCache::CreateMapId(address, size);
|
||||||
|
DLCache::DLMap::iterator iter = DLCache::dl_map.find(dl_id);
|
||||||
|
|
||||||
|
stats.numDListsAlive = DLCache::dl_map.size();
|
||||||
|
if (iter != DLCache::dl_map.end())
|
||||||
|
{
|
||||||
|
DLCache::CachedDisplayList &dl = iter->second;
|
||||||
|
if (dl.uncachable)
|
||||||
|
{
|
||||||
|
// We haven't compiled it - let's return false so it gets
|
||||||
|
// interpreted.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Got one! And it's been compiled too, so let's run the compiled code!
|
||||||
|
switch (dl.pass)
|
||||||
|
{
|
||||||
|
case DLCache::DLPASS_ANALYZE:
|
||||||
|
PanicAlert("DLPASS_ANALYZE - should have been done the first pass");
|
||||||
|
break;
|
||||||
|
case DLCache::DLPASS_COMPILE:
|
||||||
|
// First, check that the hash is the same as the last time.
|
||||||
|
if (dl.dl_hash != HashAdler32(Memory_GetPtr(address), size))
|
||||||
|
{
|
||||||
|
// PanicAlert("uncachable %08x", address);
|
||||||
|
dl.uncachable = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
DLCache::CompileAndRunDisplayList(address, size, &dl);
|
||||||
|
dl.pass = DLCache::DLPASS_RUN;
|
||||||
|
break;
|
||||||
|
case DLCache::DLPASS_RUN:
|
||||||
|
{
|
||||||
|
// Every N draws, check hash
|
||||||
|
dl.check--;
|
||||||
|
if (dl.check <= 0)
|
||||||
|
{
|
||||||
|
if (dl.dl_hash != HashAdler32(Memory_GetPtr(address), size))
|
||||||
|
{
|
||||||
|
dl.uncachable = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
dl.check = dl.next_check;
|
||||||
|
dl.next_check *= 2;
|
||||||
|
if (dl.next_check > 1024)
|
||||||
|
dl.next_check = 1024;
|
||||||
|
}
|
||||||
|
u8 *old_datareader = g_pVideoData;
|
||||||
|
((void (*)())(void*)(dl.compiled_code))();
|
||||||
|
g_pVideoData = old_datareader;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
DLCache::CachedDisplayList dl;
|
||||||
|
|
||||||
|
if (DLCache::AnalyzeAndRunDisplayList(address, size, &dl)) {
|
||||||
|
dl.dl_hash = HashAdler32(Memory_GetPtr(address), size);
|
||||||
|
dl.pass = DLCache::DLPASS_COMPILE;
|
||||||
|
dl.check = 1;
|
||||||
|
dl.next_check = 1;
|
||||||
|
DLCache::dl_map[dl_id] = dl;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
dl.uncachable = true;
|
||||||
|
DLCache::dl_map[dl_id] = dl;
|
||||||
|
return true; // don't also interpret the list.
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
// Copyright (C) 2003-2009 Dolphin Project.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, version 2.0.
|
||||||
|
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License 2.0 for more details.
|
||||||
|
|
||||||
|
// A copy of the GPL 2.0 should have been included with the program.
|
||||||
|
// If not, see http://www.gnu.org/licenses/
|
||||||
|
|
||||||
|
// Official SVN repository and contact information can be found at
|
||||||
|
// http://code.google.com/p/dolphin-emu/
|
||||||
|
|
||||||
|
#ifndef _DLCACHE_H
|
||||||
|
#define _DLCACHE_H
|
||||||
|
|
||||||
|
bool HandleDisplayList(u32 address, u32 size);
|
||||||
|
|
||||||
|
namespace DLCache {
|
||||||
|
|
||||||
|
void Init();
|
||||||
|
void Shutdown();
|
||||||
|
void ProgressiveCleanup();
|
||||||
|
void Clear();
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
#endif // _DLCACHE_H
|
|
@ -43,6 +43,7 @@
|
||||||
#include "TextureMngr.h"
|
#include "TextureMngr.h"
|
||||||
#include "rasterfont.h"
|
#include "rasterfont.h"
|
||||||
#include "VertexShaderGen.h"
|
#include "VertexShaderGen.h"
|
||||||
|
#include "DLCache.h"
|
||||||
#include "PixelShaderCache.h"
|
#include "PixelShaderCache.h"
|
||||||
#include "PixelShaderManager.h"
|
#include "PixelShaderManager.h"
|
||||||
#include "VertexShaderCache.h"
|
#include "VertexShaderCache.h"
|
||||||
|
@ -1102,6 +1103,7 @@ void Renderer::SwapBuffers()
|
||||||
GL_REPORT_ERRORD();
|
GL_REPORT_ERRORD();
|
||||||
|
|
||||||
// Clean out old stuff from caches
|
// Clean out old stuff from caches
|
||||||
|
DLCache::ProgressiveCleanup();
|
||||||
VertexShaderCache::ProgressiveCleanup();
|
VertexShaderCache::ProgressiveCleanup();
|
||||||
PixelShaderCache::ProgressiveCleanup();
|
PixelShaderCache::ProgressiveCleanup();
|
||||||
TextureMngr::ProgressiveCleanup();
|
TextureMngr::ProgressiveCleanup();
|
||||||
|
@ -1186,6 +1188,7 @@ void Renderer::DrawDebugText()
|
||||||
p+=sprintf(p,"vshaders alive: %i\n",stats.numVertexShadersAlive);
|
p+=sprintf(p,"vshaders alive: %i\n",stats.numVertexShadersAlive);
|
||||||
p+=sprintf(p,"dlists called: %i\n",stats.numDListsCalled);
|
p+=sprintf(p,"dlists called: %i\n",stats.numDListsCalled);
|
||||||
p+=sprintf(p,"dlists called(f): %i\n",stats.thisFrame.numDListsCalled);
|
p+=sprintf(p,"dlists called(f): %i\n",stats.thisFrame.numDListsCalled);
|
||||||
|
p+=sprintf(p,"dlists alive: %i\n",stats.numDListsAlive);
|
||||||
// not used.
|
// not used.
|
||||||
//p+=sprintf(p,"dlists created: %i\n",stats.numDListsCreated);
|
//p+=sprintf(p,"dlists created: %i\n",stats.numDListsCreated);
|
||||||
//p+=sprintf(p,"dlists alive: %i\n",stats.numDListsAlive);
|
//p+=sprintf(p,"dlists alive: %i\n",stats.numDListsAlive);
|
||||||
|
|
|
@ -187,10 +187,6 @@ void Flush()
|
||||||
|
|
||||||
GL_REPORT_ERRORD();
|
GL_REPORT_ERRORD();
|
||||||
|
|
||||||
if(g_bSkipCurrentFrame) {
|
|
||||||
ResetBuffer();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
|
glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
|
||||||
glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW);
|
glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW);
|
||||||
|
@ -226,7 +222,7 @@ void Flush()
|
||||||
tex.texImage0[i&3].width + 1, tex.texImage0[i&3].height + 1,
|
tex.texImage0[i&3].width + 1, tex.texImage0[i&3].height + 1,
|
||||||
tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9, tex.texTlut[i&3].tlut_format);
|
tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9, tex.texTlut[i&3].tlut_format);
|
||||||
|
|
||||||
if (tentry != NULL)
|
if (tentry)
|
||||||
{
|
{
|
||||||
// texture loaded fine, set dims for pixel shader
|
// texture loaded fine, set dims for pixel shader
|
||||||
if (tentry->isRectangle)
|
if (tentry->isRectangle)
|
||||||
|
|
|
@ -91,6 +91,7 @@ GFXDebuggerOGL *m_DebuggerFrame = NULL;
|
||||||
#include "PostProcessing.h"
|
#include "PostProcessing.h"
|
||||||
#include "OnScreenDisplay.h"
|
#include "OnScreenDisplay.h"
|
||||||
#include "Setup.h"
|
#include "Setup.h"
|
||||||
|
#include "DLCache.h"
|
||||||
|
|
||||||
#include "VideoState.h"
|
#include "VideoState.h"
|
||||||
|
|
||||||
|
@ -385,7 +386,7 @@ void Video_Prepare(void)
|
||||||
GL_REPORT_ERRORD();
|
GL_REPORT_ERRORD();
|
||||||
VertexLoaderManager::Init();
|
VertexLoaderManager::Init();
|
||||||
TextureConverter::Init();
|
TextureConverter::Init();
|
||||||
|
DLCache::Init();
|
||||||
s_swapRequested = FALSE;
|
s_swapRequested = FALSE;
|
||||||
s_efbAccessRequested = FALSE;
|
s_efbAccessRequested = FALSE;
|
||||||
|
|
||||||
|
@ -400,6 +401,7 @@ void Shutdown(void)
|
||||||
s_efbAccessRequested = FALSE;
|
s_efbAccessRequested = FALSE;
|
||||||
s_swapRequested = FALSE;
|
s_swapRequested = FALSE;
|
||||||
|
|
||||||
|
DLCache::Shutdown();
|
||||||
Fifo_Shutdown();
|
Fifo_Shutdown();
|
||||||
PostProcessing::Shutdown();
|
PostProcessing::Shutdown();
|
||||||
|
|
||||||
|
@ -418,7 +420,6 @@ void Shutdown(void)
|
||||||
OpenGL_Shutdown();
|
OpenGL_Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Video_SendFifoData(u8* _uData, u32 len)
|
void Video_SendFifoData(u8* _uData, u32 len)
|
||||||
{
|
{
|
||||||
Fifo_SendFifoData(_uData, len);
|
Fifo_SendFifoData(_uData, len);
|
||||||
|
@ -435,8 +436,6 @@ void Video_ExitLoop()
|
||||||
Fifo_ExitLoop();
|
Fifo_ExitLoop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Screenshot and screen message
|
// Screenshot and screen message
|
||||||
|
|
||||||
void Video_Screenshot(const char *_szFilename)
|
void Video_Screenshot(const char *_szFilename)
|
||||||
|
|
Loading…
Reference in New Issue