Merge pull request #924 from comex/fifo-command-runnable

Refactor opcode decoding a bit to kill FifoCommandRunnable.
This commit is contained in:
comex 2014-09-02 23:27:30 -04:00
commit dd5be7c0dc
5 changed files with 123 additions and 270 deletions

View File

@ -173,7 +173,7 @@ void RunGpuLoop()
ReadDataFromFifo(uData, 32); ReadDataFromFifo(uData, 32);
cyclesExecuted = OpcodeDecoder_Run(g_bSkipCurrentFrame); cyclesExecuted = OpcodeDecoder_Run(g_bSkipCurrentFrame, GetVideoBufferEndPtr());
if (Core::g_CoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) > cyclesExecuted) if (Core::g_CoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) > cyclesExecuted)
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted); Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
@ -235,7 +235,7 @@ void RunGpu()
FPURoundMode::SaveSIMDState(); FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState(); FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(uData, 32); ReadDataFromFifo(uData, 32);
OpcodeDecoder_Run(g_bSkipCurrentFrame); OpcodeDecoder_Run(g_bSkipCurrentFrame, GetVideoBufferEndPtr());
FPURoundMode::LoadSIMDState(); FPURoundMode::LoadSIMDState();
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");

View File

@ -75,13 +75,13 @@ static DataReadU32xNfunc DataReadU32xFuncs[16] = {
DataReadU32xN<16> DataReadU32xN<16>
}; };
static void Decode(); static u32 InterpretDisplayList(u32 address, u32 size)
void InterpretDisplayList(u32 address, u32 size)
{ {
u8* old_pVideoData = g_pVideoData; u8* old_pVideoData = g_pVideoData;
u8* startAddress = Memory::GetPointer(address); u8* startAddress = Memory::GetPointer(address);
u32 cycles = 0;
// Avoid the crash if Memory::GetPointer failed .. // Avoid the crash if Memory::GetPointer failed ..
if (startAddress != nullptr) if (startAddress != nullptr)
{ {
@ -91,10 +91,7 @@ void InterpretDisplayList(u32 address, u32 size)
Statistics::SwapDL(); Statistics::SwapDL();
u8 *end = g_pVideoData + size; u8 *end = g_pVideoData + size;
while (g_pVideoData < end) cycles = OpcodeDecoder_Run(false, end);
{
Decode();
}
INCSTAT(stats.thisFrame.numDListsCalled); INCSTAT(stats.thisFrame.numDListsCalled);
// un-swap // un-swap
@ -103,127 +100,23 @@ void InterpretDisplayList(u32 address, u32 size)
// reset to the old pointer // reset to the old pointer
g_pVideoData = old_pVideoData; g_pVideoData = old_pVideoData;
return cycles;
} }
static u32 FifoCommandRunnable(u32 &command_size) static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
{ {
u32 cycleTime = 0;
u32 buffer_size = (u32)(GetVideoBufferEndPtr() - g_pVideoData);
if (buffer_size == 0)
return 0; // can't peek
u8 cmd_byte = DataPeek8(0);
switch (cmd_byte)
{
case GX_NOP: // Hm, this means that we scan over nop streams pretty slowly...
command_size = 1;
cycleTime = 6;
break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache - no parameters
command_size = 1;
cycleTime = 6;
break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
command_size = 1;
cycleTime = 6;
break;
case GX_LOAD_BP_REG:
command_size = 5;
cycleTime = 12;
break;
case GX_LOAD_CP_REG:
command_size = 6;
cycleTime = 12;
break;
case GX_LOAD_INDX_A:
case GX_LOAD_INDX_B:
case GX_LOAD_INDX_C:
case GX_LOAD_INDX_D:
command_size = 5;
cycleTime = 6; // TODO
break;
case GX_CMD_CALL_DL:
{
// FIXME: Calculate the cycle time of the display list.
//u32 address = DataPeek32(1);
//u32 size = DataPeek32(5);
//u8* old_pVideoData = g_pVideoData;
//u8* startAddress = Memory::GetPointer(address);
//// Avoid the crash if Memory::GetPointer failed ..
//if (startAddress != 0)
//{
// g_pVideoData = startAddress;
// u8 *end = g_pVideoData + size;
// u32 step = 0;
// while (g_pVideoData < end)
// {
// cycleTime += FifoCommandRunnable(step);
// g_pVideoData += step;
// }
//}
//else
//{
// cycleTime = 45;
//}
//// reset to the old pointer
//g_pVideoData = old_pVideoData;
command_size = 9;
cycleTime = 45; // This is unverified
}
break;
case GX_LOAD_XF_REG:
{
// check if we can read the header
if (buffer_size >= 5)
{
command_size = 1 + 4;
u32 Cmd2 = DataPeek32(1);
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
command_size += transfer_size * 4;
cycleTime = 18 + 6 * transfer_size;
}
else
{
return 0;
}
}
break;
default:
if ((cmd_byte & 0xC0) == 0x80)
{
// check if we can read the header
if (buffer_size >= 3)
{
command_size = 1 + 2;
u16 numVertices = DataPeek16(1);
command_size += numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK);
cycleTime = 1600; // This depends on the number of pixels rendered
}
else
{
return 0;
}
}
else
{
// TODO(Omega): Maybe dump FIFO to file on this error // TODO(Omega): Maybe dump FIFO to file on this error
std::string temp = StringFromFormat( std::string temp = StringFromFormat(
"GFX FIFO: Unknown Opcode (0x%x).\n" "GFX FIFO: Unknown Opcode (0x%x @ %p).\n"
"This means one of the following:\n" "This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n" "* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n" "* Command stream corrupted by some spurious memory bug\n"
"* This really is an unknown opcode (unlikely)\n" "* This really is an unknown opcode (unlikely)\n"
"* Some other sort of bug\n\n" "* Some other sort of bug\n\n"
"Dolphin will now likely crash or hang. Enjoy." , cmd_byte); "Dolphin will now likely crash or hang. Enjoy." ,
cmd_byte,
buffer);
Host_SysMessage(temp.c_str()); Host_SysMessage(temp.c_str());
INFO_LOG(VIDEO, "%s", temp.c_str()); INFO_LOG(VIDEO, "%s", temp.c_str());
{ {
@ -251,38 +144,27 @@ static u32 FifoCommandRunnable(u32 &command_size)
Host_SysMessage(tmp.c_str()); Host_SysMessage(tmp.c_str());
INFO_LOG(VIDEO, "%s", tmp.c_str()); INFO_LOG(VIDEO, "%s", tmp.c_str());
} }
}
break;
}
if (command_size > buffer_size)
return 0;
// INFO_LOG("OP detected: cmd_byte 0x%x size %i buffer %i",cmd_byte, command_size, buffer_size);
if (cycleTime == 0)
cycleTime = 6;
return cycleTime;
} }
static u32 FifoCommandRunnable() static u32 Decode(u8* end, bool skipped_frame)
{
u32 command_size = 0;
return FifoCommandRunnable(command_size);
}
static void Decode()
{ {
u8 *opcodeStart = g_pVideoData; u8 *opcodeStart = g_pVideoData;
if (g_pVideoData == end)
return 0;
int cmd_byte = DataReadU8(); u8 cmd_byte = DataReadU8();
u32 cycles;
switch (cmd_byte) switch (cmd_byte)
{ {
case GX_NOP: case GX_NOP:
cycles = 6; // Hm, this means that we scan over nop streams pretty slowly...
break; break;
case GX_LOAD_CP_REG: //0x08 case GX_LOAD_CP_REG: //0x08
{ {
if (end - g_pVideoData < 1 + 4)
return 0;
cycles = 12;
u8 sub_cmd = DataReadU8(); u8 sub_cmd = DataReadU8();
u32 value = DataReadU32(); u32 value = DataReadU32();
LoadCPReg(sub_cmd, value); LoadCPReg(sub_cmd, value);
@ -292,8 +174,13 @@ static void Decode()
case GX_LOAD_XF_REG: case GX_LOAD_XF_REG:
{ {
if (end - g_pVideoData < 4)
return 0;
u32 Cmd2 = DataReadU32(); u32 Cmd2 = DataReadU32();
int transfer_size = ((Cmd2 >> 16) & 15) + 1; int transfer_size = ((Cmd2 >> 16) & 15) + 1;
if ((size_t) (end - g_pVideoData) < transfer_size * sizeof(u32))
return 0;
cycles = 18 + 6 * transfer_size;
u32 xf_address = Cmd2 & 0xFFFF; u32 xf_address = Cmd2 & 0xFFFF;
GC_ALIGNED128(u32 data_buffer[16]); GC_ALIGNED128(u32 data_buffer[16]);
DataReadU32xFuncs[transfer_size-1](data_buffer); DataReadU32xFuncs[transfer_size-1](data_buffer);
@ -304,36 +191,60 @@ static void Decode()
break; break;
case GX_LOAD_INDX_A: //used for position matrices case GX_LOAD_INDX_A: //used for position matrices
if (end - g_pVideoData < 4)
return 0;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xC); LoadIndexedXF(DataReadU32(), 0xC);
break; break;
case GX_LOAD_INDX_B: //used for normal matrices case GX_LOAD_INDX_B: //used for normal matrices
if (end - g_pVideoData < 4)
return 0;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xD); LoadIndexedXF(DataReadU32(), 0xD);
break; break;
case GX_LOAD_INDX_C: //used for postmatrices case GX_LOAD_INDX_C: //used for postmatrices
if (end - g_pVideoData < 4)
return 0;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xE); LoadIndexedXF(DataReadU32(), 0xE);
break; break;
case GX_LOAD_INDX_D: //used for lights case GX_LOAD_INDX_D: //used for lights
if (end - g_pVideoData < 4)
return 0;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xF); LoadIndexedXF(DataReadU32(), 0xF);
break; break;
case GX_CMD_CALL_DL: case GX_CMD_CALL_DL:
{ {
if (end - g_pVideoData < 8)
return 0;
u32 address = DataReadU32(); u32 address = DataReadU32();
u32 count = DataReadU32(); u32 count = DataReadU32();
InterpretDisplayList(address, count); if (skipped_frame)
cycles = 45; // xxx
else
cycles = 6 + InterpretDisplayList(address, count);
} }
break; break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
cycles = 6;
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
break; break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache case GX_CMD_INVL_VC: // Invalidate Vertex Cache
cycles = 6;
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
break; break;
case GX_LOAD_BP_REG: //0x61 case GX_LOAD_BP_REG: //0x61
// In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{ {
if (end - g_pVideoData < 4)
return 0;
cycles = 12;
u32 bp_cmd = DataReadU32(); u32 bp_cmd = DataReadU32();
LoadBPReg(bp_cmd); LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads); INCSTAT(stats.thisFrame.numBPLoads);
@ -344,18 +255,33 @@ static void Decode()
default: default:
if ((cmd_byte & 0xC0) == 0x80) if ((cmd_byte & 0xC0) == 0x80)
{ {
// load vertices (use computed vertex size from FifoCommandRunnable above) cycles = 1600;
// load vertices
if (end - g_pVideoData < 2)
return 0;
u16 numVertices = DataReadU16(); u16 numVertices = DataReadU16();
VertexLoaderManager::RunVertices( if (skipped_frame)
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) {
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, size_t size = numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK);
numVertices); if ((size_t) (end - g_pVideoData) < size)
return 0;
DataSkip((u32)size);
} }
else else
{ {
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte); if (!VertexLoaderManager::RunVertices(
break; cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices,
end - g_pVideoData))
return 0;
}
}
else
{
UnknownOpcode(cmd_byte, opcodeStart, false);
cycles = 1;
} }
break; break;
} }
@ -363,89 +289,8 @@ static void Decode()
// Display lists get added directly into the FIFO stream // Display lists get added directly into the FIFO stream
if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart)); FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart));
}
static void DecodeSemiNop() return cycles;
{
u8 *opcodeStart = g_pVideoData;
int cmd_byte = DataReadU8();
switch (cmd_byte)
{
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
case GX_NOP:
break;
case GX_LOAD_CP_REG: //0x08
// We have to let CP writes through because they determine the size of vertices.
{
u8 sub_cmd = DataReadU8();
u32 value = DataReadU32();
LoadCPReg(sub_cmd, value);
INCSTAT(stats.thisFrame.numCPLoads);
}
break;
case GX_LOAD_XF_REG:
{
u32 Cmd2 = DataReadU32();
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
u32 address = Cmd2 & 0xFFFF;
GC_ALIGNED128(u32 data_buffer[16]);
DataReadU32xFuncs[transfer_size-1](data_buffer);
LoadXFReg(transfer_size, address, data_buffer);
INCSTAT(stats.thisFrame.numXFLoads);
}
break;
case GX_LOAD_INDX_A: //used for position matrices
LoadIndexedXF(DataReadU32(), 0xC);
break;
case GX_LOAD_INDX_B: //used for normal matrices
LoadIndexedXF(DataReadU32(), 0xD);
break;
case GX_LOAD_INDX_C: //used for postmatrices
LoadIndexedXF(DataReadU32(), 0xE);
break;
case GX_LOAD_INDX_D: //used for lights
LoadIndexedXF(DataReadU32(), 0xF);
break;
case GX_CMD_CALL_DL:
// Hm, wonder if any games put tokens in display lists - in that case,
// we'll have to parse them too.
DataSkip(8);
break;
case GX_LOAD_BP_REG: //0x61
// We have to let BP writes through because they set tokens and stuff.
// TODO: Call a much simplified LoadBPReg instead.
{
u32 bp_cmd = DataReadU32();
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
break;
// draw primitives
default:
if ((cmd_byte & 0xC0) == 0x80)
{
// load vertices (use computed vertex size from FifoCommandRunnable above)
u16 numVertices = DataReadU16();
DataSkip(numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK));
}
else
{
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte);
break;
}
break;
}
if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart));
} }
void OpcodeDecoder_Init() void OpcodeDecoder_Init()
@ -466,15 +311,18 @@ void OpcodeDecoder_Shutdown()
{ {
} }
u32 OpcodeDecoder_Run(bool skipped_frame) u32 OpcodeDecoder_Run(bool skipped_frame, u8* end)
{ {
u32 totalCycles = 0; u32 totalCycles = 0;
while (true) while (true)
{ {
u32 cycles = FifoCommandRunnable(); u8* old = g_pVideoData;
u32 cycles = Decode(end, skipped_frame);
if (cycles == 0) if (cycles == 0)
{
g_pVideoData = old;
break; break;
skipped_frame ? DecodeSemiNop() : Decode(); }
totalCycles += cycles; totalCycles += cycles;
} }
return totalCycles; return totalCycles;

View File

@ -38,5 +38,4 @@ extern bool g_bRecordFifoData;
void OpcodeDecoder_Init(); void OpcodeDecoder_Init();
void OpcodeDecoder_Shutdown(); void OpcodeDecoder_Shutdown();
u32 OpcodeDecoder_Run(bool skipped_frame); u32 OpcodeDecoder_Run(bool skipped_frame, u8* end);
void InterpretDisplayList(u32 address, u32 size);

View File

@ -151,17 +151,21 @@ static VertexLoaderCacheItem RefreshLoader(int vtx_attr_group)
return s_VertexLoaders[vtx_attr_group]; return s_VertexLoaders[vtx_attr_group];
} }
void RunVertices(int vtx_attr_group, int primitive, int count) bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size)
{ {
if (!count) if (!count)
return; return true;
auto loader = RefreshLoader(vtx_attr_group); auto loader = RefreshLoader(vtx_attr_group);
size_t size = count * loader.first->GetVertexSize();
if (buf_size < size)
return false;
if (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5) if (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5)
{ {
// if cull mode is CULL_ALL, ignore triangles and quads // if cull mode is CULL_ALL, ignore triangles and quads
DataSkip(count * loader.first->GetVertexSize()); DataSkip((u32)size);
return; return true;
} }
// If the native vertex format changed, force a flush. // If the native vertex format changed, force a flush.
@ -178,6 +182,7 @@ void RunVertices(int vtx_attr_group, int primitive, int count)
ADDSTAT(stats.thisFrame.numPrims, count); ADDSTAT(stats.thisFrame.numPrims, count);
INCSTAT(stats.thisFrame.numPrimitiveJoins); INCSTAT(stats.thisFrame.numPrimitiveJoins);
return true;
} }
int GetVertexSize(int vtx_attr_group) int GetVertexSize(int vtx_attr_group)

View File

@ -17,7 +17,8 @@ namespace VertexLoaderManager
void MarkAllDirty(); void MarkAllDirty();
int GetVertexSize(int vtx_attr_group); int GetVertexSize(int vtx_attr_group);
void RunVertices(int vtx_attr_group, int primitive, int count); // Returns false if buf_size is insufficient.
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size);
// For debugging // For debugging
void AppendListToString(std::string *dest); void AppendListToString(std::string *dest);