diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 165cd8e239..fb47c73e49 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -173,7 +173,7 @@ void RunGpuLoop() ReadDataFromFifo(uData, 32); - cyclesExecuted = OpcodeDecoder_Run(g_bSkipCurrentFrame); + cyclesExecuted = OpcodeDecoder_Run(g_bSkipCurrentFrame, GetVideoBufferEndPtr()); if (Core::g_CoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) > cyclesExecuted) Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted); @@ -235,7 +235,7 @@ void RunGpu() FPURoundMode::SaveSIMDState(); FPURoundMode::LoadDefaultSIMDState(); ReadDataFromFifo(uData, 32); - OpcodeDecoder_Run(g_bSkipCurrentFrame); + OpcodeDecoder_Run(g_bSkipCurrentFrame, GetVideoBufferEndPtr()); FPURoundMode::LoadSIMDState(); //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index ff9cd8e735..fe644db21e 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -75,13 +75,13 @@ static DataReadU32xNfunc DataReadU32xFuncs[16] = { DataReadU32xN<16> }; -static void Decode(); - -void InterpretDisplayList(u32 address, u32 size) +static u32 InterpretDisplayList(u32 address, u32 size) { u8* old_pVideoData = g_pVideoData; u8* startAddress = Memory::GetPointer(address); + u32 cycles = 0; + // Avoid the crash if Memory::GetPointer failed .. if (startAddress != nullptr) { @@ -91,10 +91,7 @@ void InterpretDisplayList(u32 address, u32 size) Statistics::SwapDL(); u8 *end = g_pVideoData + size; - while (g_pVideoData < end) - { - Decode(); - } + cycles = OpcodeDecoder_Run(false, end); INCSTAT(stats.thisFrame.numDListsCalled); // un-swap @@ -103,186 +100,71 @@ void InterpretDisplayList(u32 address, u32 size) // reset to the old pointer g_pVideoData = old_pVideoData; + + return cycles; } -static u32 FifoCommandRunnable(u32 &command_size) +static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess) { - u32 cycleTime = 0; - u32 buffer_size = (u32)(GetVideoBufferEndPtr() - g_pVideoData); - if (buffer_size == 0) - return 0; // can't peek - - u8 cmd_byte = DataPeek8(0); - - switch (cmd_byte) + // TODO(Omega): Maybe dump FIFO to file on this error + std::string temp = StringFromFormat( + "GFX FIFO: Unknown Opcode (0x%x @ %p).\n" + "This means one of the following:\n" + "* The emulated GPU got desynced, disabling dual core can help\n" + "* Command stream corrupted by some spurious memory bug\n" + "* This really is an unknown opcode (unlikely)\n" + "* Some other sort of bug\n\n" + "Dolphin will now likely crash or hang. Enjoy." , + cmd_byte, + buffer); + Host_SysMessage(temp.c_str()); + INFO_LOG(VIDEO, "%s", temp.c_str()); { - case GX_NOP: // Hm, this means that we scan over nop streams pretty slowly... - command_size = 1; - cycleTime = 6; - break; - case GX_CMD_INVL_VC: // Invalidate Vertex Cache - no parameters - command_size = 1; - cycleTime = 6; - break; - case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that - command_size = 1; - cycleTime = 6; - break; + SCPFifoStruct &fifo = CommandProcessor::fifo; - case GX_LOAD_BP_REG: - command_size = 5; - cycleTime = 12; - break; + std::string tmp = StringFromFormat( + "Illegal command %02x\n" + "CPBase: 0x%08x\n" + "CPEnd: 0x%08x\n" + "CPHiWatermark: 0x%08x\n" + "CPLoWatermark: 0x%08x\n" + "CPReadWriteDistance: 0x%08x\n" + "CPWritePointer: 0x%08x\n" + "CPReadPointer: 0x%08x\n" + "CPBreakpoint: 0x%08x\n" + "bFF_GPReadEnable: %s\n" + "bFF_BPEnable: %s\n" + "bFF_BPInt: %s\n" + "bFF_Breakpoint: %s\n" + ,cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance + ,fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false" + ,fifo.bFF_BPEnable ? "true" : "false" ,fifo.bFF_BPInt ? "true" : "false" + ,fifo.bFF_Breakpoint ? "true" : "false"); - case GX_LOAD_CP_REG: - command_size = 6; - cycleTime = 12; - break; - - case GX_LOAD_INDX_A: - case GX_LOAD_INDX_B: - case GX_LOAD_INDX_C: - case GX_LOAD_INDX_D: - command_size = 5; - cycleTime = 6; // TODO - break; - - case GX_CMD_CALL_DL: - { - // FIXME: Calculate the cycle time of the display list. - //u32 address = DataPeek32(1); - //u32 size = DataPeek32(5); - //u8* old_pVideoData = g_pVideoData; - //u8* startAddress = Memory::GetPointer(address); - - //// Avoid the crash if Memory::GetPointer failed .. - //if (startAddress != 0) - //{ - // g_pVideoData = startAddress; - // u8 *end = g_pVideoData + size; - // u32 step = 0; - // while (g_pVideoData < end) - // { - // cycleTime += FifoCommandRunnable(step); - // g_pVideoData += step; - // } - //} - //else - //{ - // cycleTime = 45; - //} - - //// reset to the old pointer - //g_pVideoData = old_pVideoData; - command_size = 9; - cycleTime = 45; // This is unverified - } - break; - - case GX_LOAD_XF_REG: - { - // check if we can read the header - if (buffer_size >= 5) - { - command_size = 1 + 4; - u32 Cmd2 = DataPeek32(1); - int transfer_size = ((Cmd2 >> 16) & 15) + 1; - command_size += transfer_size * 4; - cycleTime = 18 + 6 * transfer_size; - } - else - { - return 0; - } - } - break; - - default: - if ((cmd_byte & 0xC0) == 0x80) - { - // check if we can read the header - if (buffer_size >= 3) - { - command_size = 1 + 2; - u16 numVertices = DataPeek16(1); - command_size += numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK); - cycleTime = 1600; // This depends on the number of pixels rendered - } - else - { - return 0; - } - } - else - { - // TODO(Omega): Maybe dump FIFO to file on this error - std::string temp = StringFromFormat( - "GFX FIFO: Unknown Opcode (0x%x).\n" - "This means one of the following:\n" - "* The emulated GPU got desynced, disabling dual core can help\n" - "* Command stream corrupted by some spurious memory bug\n" - "* This really is an unknown opcode (unlikely)\n" - "* Some other sort of bug\n\n" - "Dolphin will now likely crash or hang. Enjoy." , cmd_byte); - Host_SysMessage(temp.c_str()); - INFO_LOG(VIDEO, "%s", temp.c_str()); - { - SCPFifoStruct &fifo = CommandProcessor::fifo; - - std::string tmp = StringFromFormat( - "Illegal command %02x\n" - "CPBase: 0x%08x\n" - "CPEnd: 0x%08x\n" - "CPHiWatermark: 0x%08x\n" - "CPLoWatermark: 0x%08x\n" - "CPReadWriteDistance: 0x%08x\n" - "CPWritePointer: 0x%08x\n" - "CPReadPointer: 0x%08x\n" - "CPBreakpoint: 0x%08x\n" - "bFF_GPReadEnable: %s\n" - "bFF_BPEnable: %s\n" - "bFF_BPInt: %s\n" - "bFF_Breakpoint: %s\n" - ,cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance - ,fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false" - ,fifo.bFF_BPEnable ? "true" : "false" ,fifo.bFF_BPInt ? "true" : "false" - ,fifo.bFF_Breakpoint ? "true" : "false"); - - Host_SysMessage(tmp.c_str()); - INFO_LOG(VIDEO, "%s", tmp.c_str()); - } - } - break; + Host_SysMessage(tmp.c_str()); + INFO_LOG(VIDEO, "%s", tmp.c_str()); } - - if (command_size > buffer_size) - return 0; - - // INFO_LOG("OP detected: cmd_byte 0x%x size %i buffer %i",cmd_byte, command_size, buffer_size); - if (cycleTime == 0) - cycleTime = 6; - - return cycleTime; } -static u32 FifoCommandRunnable() -{ - u32 command_size = 0; - return FifoCommandRunnable(command_size); -} - -static void Decode() +static u32 Decode(u8* end, bool skipped_frame) { u8 *opcodeStart = g_pVideoData; + if (g_pVideoData == end) + return 0; - int cmd_byte = DataReadU8(); + u8 cmd_byte = DataReadU8(); + u32 cycles; switch (cmd_byte) { case GX_NOP: + cycles = 6; // Hm, this means that we scan over nop streams pretty slowly... break; case GX_LOAD_CP_REG: //0x08 { + if (end - g_pVideoData < 1 + 4) + return 0; + cycles = 12; u8 sub_cmd = DataReadU8(); u32 value = DataReadU32(); LoadCPReg(sub_cmd, value); @@ -292,8 +174,13 @@ static void Decode() case GX_LOAD_XF_REG: { + if (end - g_pVideoData < 4) + return 0; u32 Cmd2 = DataReadU32(); int transfer_size = ((Cmd2 >> 16) & 15) + 1; + if ((size_t) (end - g_pVideoData) < transfer_size * sizeof(u32)) + return 0; + cycles = 18 + 6 * transfer_size; u32 xf_address = Cmd2 & 0xFFFF; GC_ALIGNED128(u32 data_buffer[16]); DataReadU32xFuncs[transfer_size-1](data_buffer); @@ -304,36 +191,60 @@ static void Decode() break; case GX_LOAD_INDX_A: //used for position matrices + if (end - g_pVideoData < 4) + return 0; + cycles = 6; LoadIndexedXF(DataReadU32(), 0xC); break; case GX_LOAD_INDX_B: //used for normal matrices + if (end - g_pVideoData < 4) + return 0; + cycles = 6; LoadIndexedXF(DataReadU32(), 0xD); break; case GX_LOAD_INDX_C: //used for postmatrices + if (end - g_pVideoData < 4) + return 0; + cycles = 6; LoadIndexedXF(DataReadU32(), 0xE); break; case GX_LOAD_INDX_D: //used for lights + if (end - g_pVideoData < 4) + return 0; + cycles = 6; LoadIndexedXF(DataReadU32(), 0xF); break; case GX_CMD_CALL_DL: { + if (end - g_pVideoData < 8) + return 0; u32 address = DataReadU32(); u32 count = DataReadU32(); - InterpretDisplayList(address, count); + if (skipped_frame) + cycles = 45; // xxx + else + cycles = 6 + InterpretDisplayList(address, count); } break; case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that + cycles = 6; DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); break; case GX_CMD_INVL_VC: // Invalidate Vertex Cache + cycles = 6; DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); break; case GX_LOAD_BP_REG: //0x61 + // In skipped_frame case: We have to let BP writes through because they set + // tokens and stuff. TODO: Call a much simplified LoadBPReg instead. { + if (end - g_pVideoData < 4) + return 0; + cycles = 12; u32 bp_cmd = DataReadU32(); LoadBPReg(bp_cmd); INCSTAT(stats.thisFrame.numBPLoads); @@ -344,18 +255,33 @@ static void Decode() default: if ((cmd_byte & 0xC0) == 0x80) { - // load vertices (use computed vertex size from FifoCommandRunnable above) + cycles = 1600; + // load vertices + if (end - g_pVideoData < 2) + return 0; u16 numVertices = DataReadU16(); - VertexLoaderManager::RunVertices( - cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, - numVertices); + if (skipped_frame) + { + size_t size = numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK); + if ((size_t) (end - g_pVideoData) < size) + return 0; + DataSkip((u32)size); + } + else + { + if (!VertexLoaderManager::RunVertices( + cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) + (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, + numVertices, + end - g_pVideoData)) + return 0; + } } else { - ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte); - break; + UnknownOpcode(cmd_byte, opcodeStart, false); + cycles = 1; } break; } @@ -363,89 +289,8 @@ static void Decode() // Display lists get added directly into the FIFO stream if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart)); -} -static void DecodeSemiNop() -{ - u8 *opcodeStart = g_pVideoData; - - int cmd_byte = DataReadU8(); - switch (cmd_byte) - { - case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that - case GX_CMD_INVL_VC: // Invalidate Vertex Cache - case GX_NOP: - break; - - case GX_LOAD_CP_REG: //0x08 - // We have to let CP writes through because they determine the size of vertices. - { - u8 sub_cmd = DataReadU8(); - u32 value = DataReadU32(); - LoadCPReg(sub_cmd, value); - INCSTAT(stats.thisFrame.numCPLoads); - } - break; - - case GX_LOAD_XF_REG: - { - u32 Cmd2 = DataReadU32(); - int transfer_size = ((Cmd2 >> 16) & 15) + 1; - u32 address = Cmd2 & 0xFFFF; - GC_ALIGNED128(u32 data_buffer[16]); - DataReadU32xFuncs[transfer_size-1](data_buffer); - LoadXFReg(transfer_size, address, data_buffer); - INCSTAT(stats.thisFrame.numXFLoads); - } - break; - - case GX_LOAD_INDX_A: //used for position matrices - LoadIndexedXF(DataReadU32(), 0xC); - break; - case GX_LOAD_INDX_B: //used for normal matrices - LoadIndexedXF(DataReadU32(), 0xD); - break; - case GX_LOAD_INDX_C: //used for postmatrices - LoadIndexedXF(DataReadU32(), 0xE); - break; - case GX_LOAD_INDX_D: //used for lights - LoadIndexedXF(DataReadU32(), 0xF); - break; - - case GX_CMD_CALL_DL: - // Hm, wonder if any games put tokens in display lists - in that case, - // we'll have to parse them too. - DataSkip(8); - break; - - case GX_LOAD_BP_REG: //0x61 - // We have to let BP writes through because they set tokens and stuff. - // TODO: Call a much simplified LoadBPReg instead. - { - u32 bp_cmd = DataReadU32(); - LoadBPReg(bp_cmd); - INCSTAT(stats.thisFrame.numBPLoads); - } - break; - - // draw primitives - default: - if ((cmd_byte & 0xC0) == 0x80) - { - // load vertices (use computed vertex size from FifoCommandRunnable above) - u16 numVertices = DataReadU16(); - DataSkip(numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK)); - } - else - { - ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte); - break; - } - break; - } - - if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) - FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart)); + return cycles; } void OpcodeDecoder_Init() @@ -466,15 +311,18 @@ void OpcodeDecoder_Shutdown() { } -u32 OpcodeDecoder_Run(bool skipped_frame) +u32 OpcodeDecoder_Run(bool skipped_frame, u8* end) { u32 totalCycles = 0; while (true) { - u32 cycles = FifoCommandRunnable(); + u8* old = g_pVideoData; + u32 cycles = Decode(end, skipped_frame); if (cycles == 0) + { + g_pVideoData = old; break; - skipped_frame ? DecodeSemiNop() : Decode(); + } totalCycles += cycles; } return totalCycles; diff --git a/Source/Core/VideoCommon/OpcodeDecoding.h b/Source/Core/VideoCommon/OpcodeDecoding.h index baf44bd40f..2944863bbc 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/OpcodeDecoding.h @@ -38,5 +38,4 @@ extern bool g_bRecordFifoData; void OpcodeDecoder_Init(); void OpcodeDecoder_Shutdown(); -u32 OpcodeDecoder_Run(bool skipped_frame); -void InterpretDisplayList(u32 address, u32 size); +u32 OpcodeDecoder_Run(bool skipped_frame, u8* end); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index b74fc029db..7ff21d103b 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -151,17 +151,21 @@ static VertexLoaderCacheItem RefreshLoader(int vtx_attr_group) return s_VertexLoaders[vtx_attr_group]; } -void RunVertices(int vtx_attr_group, int primitive, int count) +bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size) { if (!count) - return; + return true; auto loader = RefreshLoader(vtx_attr_group); + size_t size = count * loader.first->GetVertexSize(); + if (buf_size < size) + return false; + if (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5) { // if cull mode is CULL_ALL, ignore triangles and quads - DataSkip(count * loader.first->GetVertexSize()); - return; + DataSkip((u32)size); + return true; } // If the native vertex format changed, force a flush. @@ -178,6 +182,7 @@ void RunVertices(int vtx_attr_group, int primitive, int count) ADDSTAT(stats.thisFrame.numPrims, count); INCSTAT(stats.thisFrame.numPrimitiveJoins); + return true; } int GetVertexSize(int vtx_attr_group) diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index 17bf13c3ba..ff3485b9d5 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -17,7 +17,8 @@ namespace VertexLoaderManager void MarkAllDirty(); int GetVertexSize(int vtx_attr_group); - void RunVertices(int vtx_attr_group, int primitive, int count); + // Returns false if buf_size is insufficient. + bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size); // For debugging void AppendListToString(std::string *dest);