// Copyright (C) 2003 Dolphin Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License 2.0 for more details. // A copy of the GPL 2.0 should have been included with the program. // If not, see http://www.gnu.org/licenses/ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ //DL facts: // Ikaruga uses (nearly) NO display lists! // Zelda WW uses TONS of display lists // Zelda TP uses almost 100% display lists except menus (we like this!) // Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!) // Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they are // while interpreting them, and hope that the vertex format doesn't change, though, if you do it right // when they are called. The reason is that the vertex format affects the sizes of the vertices. #include "Common.h" #include "VideoCommon.h" #include "OpcodeDecoding.h" #include "CommandProcessor.h" #include "CPUDetect.h" #include "Core.h" #include "Host.h" #include "HW/Memmap.h" #include "FifoPlayer/FifoRecorder.h" #include "VertexLoaderManager.h" #include "Statistics.h" #include "XFMemory.h" #include "CPMemory.h" #include "BPMemory.h" #include "Fifo.h" #include "DataReader.h" #include "OpenCL.h" #include "OpenCL/OCLTextureDecoder.h" #include "VideoConfig.h" u8* g_pVideoData = 0; bool g_bRecordFifoData = false; #if _M_SSE >= 0x301 DataReadU32xNfunc DataReadU32xFuncs_SSSE3[16] = { DataReadU32xN_SSSE3<1>, DataReadU32xN_SSSE3<2>, DataReadU32xN_SSSE3<3>, DataReadU32xN_SSSE3<4>, DataReadU32xN_SSSE3<5>, DataReadU32xN_SSSE3<6>, DataReadU32xN_SSSE3<7>, DataReadU32xN_SSSE3<8>, DataReadU32xN_SSSE3<9>, DataReadU32xN_SSSE3<10>, DataReadU32xN_SSSE3<11>, DataReadU32xN_SSSE3<12>, DataReadU32xN_SSSE3<13>, DataReadU32xN_SSSE3<14>, DataReadU32xN_SSSE3<15>, DataReadU32xN_SSSE3<16> }; #endif DataReadU32xNfunc DataReadU32xFuncs[16] = { DataReadU32xN<1>, DataReadU32xN<2>, DataReadU32xN<3>, DataReadU32xN<4>, DataReadU32xN<5>, DataReadU32xN<6>, DataReadU32xN<7>, DataReadU32xN<8>, DataReadU32xN<9>, DataReadU32xN<10>, DataReadU32xN<11>, DataReadU32xN<12>, DataReadU32xN<13>, DataReadU32xN<14>, DataReadU32xN<15>, DataReadU32xN<16> }; extern u8* GetVideoBufferStartPtr(); extern u8* GetVideoBufferEndPtr(); static void Decode(); void InterpretDisplayList(u32 address, u32 size) { u8* old_pVideoData = g_pVideoData; u8* startAddress = Memory::GetPointer(address); // Avoid the crash if Memory::GetPointer failed .. if (startAddress != 0) { g_pVideoData = startAddress; // temporarily swap dl and non-dl (small "hack" for the stats) Statistics::SwapDL(); u8 *end = g_pVideoData + size; while (g_pVideoData < end) { Decode(); } INCSTAT(stats.numDListsCalled); INCSTAT(stats.thisFrame.numDListsCalled); // un-swap Statistics::SwapDL(); } // reset to the old pointer g_pVideoData = old_pVideoData; } // Defer to backend-specific DL cache. extern bool HandleDisplayList(u32 address, u32 size); void ExecuteDisplayList(u32 address, u32 size) { if (!HandleDisplayList(address, size)) InterpretDisplayList(address, size); } bool FifoCommandRunnable() { u32 buffer_size = (u32)(GetVideoBufferEndPtr() - g_pVideoData); if (buffer_size == 0) return false; // can't peek u8 cmd_byte = DataPeek8(0); u32 command_size = 0; switch (cmd_byte) { case GX_NOP: // Hm, this means that we scan over nop streams pretty slowly... case GX_CMD_INVL_VC: // Invalidate Vertex Cache - no parameters case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that command_size = 1; break; case GX_LOAD_BP_REG: command_size = 5; break; case GX_LOAD_CP_REG: command_size = 6; break; case GX_LOAD_INDX_A: case GX_LOAD_INDX_B: case GX_LOAD_INDX_C: case GX_LOAD_INDX_D: command_size = 5; break; case GX_CMD_CALL_DL: command_size = 9; break; case GX_LOAD_XF_REG: { // check if we can read the header if (buffer_size >= 5) { command_size = 1 + 4; u32 Cmd2 = DataPeek32(1); int transfer_size = ((Cmd2 >> 16) & 15) + 1; command_size += transfer_size * 4; } else { return false; } } break; default: if (cmd_byte & 0x80) { // check if we can read the header if (buffer_size >= 3) { command_size = 1 + 2; u16 numVertices = DataPeek16(1); command_size += numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK); } else { return false; } } else { // TODO(Omega): Maybe dump FIFO to file on this error char szTemp[1024]; sprintf(szTemp, "GFX FIFO: Unknown Opcode (0x%x).\n" "This means one of the following:\n" "* The emulated GPU got desynced, disabling dual core can help\n" "* Command stream corrupted by some spurious memory bug\n" "* This really is an unknown opcode (unlikely)\n" "* Some other sort of bug\n\n" "Dolphin will now likely crash or hang. Enjoy." , cmd_byte); Host_SysMessage(szTemp); INFO_LOG(VIDEO, "%s", szTemp); { SCPFifoStruct &fifo = CommandProcessor::fifo; char szTmp[512]; // sprintf(szTmp, "Illegal command %02x (at %08x)",cmd_byte,g_pDataReader->GetPtr()); sprintf(szTmp, "Illegal command %02x\n" "CPBase: 0x%08x\n" "CPEnd: 0x%08x\n" "CPHiWatermark: 0x%08x\n" "CPLoWatermark: 0x%08x\n" "CPReadWriteDistance: 0x%08x\n" "CPWritePointer: 0x%08x\n" "CPReadPointer: 0x%08x\n" "CPBreakpoint: 0x%08x\n" "bFF_GPReadEnable: %s\n" "bFF_BPEnable: %s\n" "bFF_BPInt: %s\n" "bFF_Breakpoint: %s\n" ,cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance ,fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false" ,fifo.bFF_BPEnable ? "true" : "false" ,fifo.bFF_BPInt ? "true" : "false" ,fifo.bFF_Breakpoint ? "true" : "false"); Host_SysMessage(szTmp); INFO_LOG(VIDEO, "%s", szTmp); } } break; } if (command_size > buffer_size) return false; // INFO_LOG("OP detected: cmd_byte 0x%x size %i buffer %i",cmd_byte, command_size, buffer_size); return true; } static void Decode() { u8 *opcodeStart = g_pVideoData; int cmd_byte = DataReadU8(); switch (cmd_byte) { case GX_NOP: break; case GX_LOAD_CP_REG: //0x08 { u8 sub_cmd = DataReadU8(); u32 value = DataReadU32(); LoadCPReg(sub_cmd, value); INCSTAT(stats.thisFrame.numCPLoads); } break; case GX_LOAD_XF_REG: { u32 Cmd2 = DataReadU32(); int transfer_size = ((Cmd2 >> 16) & 15) + 1; u32 xf_address = Cmd2 & 0xFFFF; GC_ALIGNED128(u32 data_buffer[16]); DataReadU32xFuncs[transfer_size-1](data_buffer); LoadXFReg(transfer_size, xf_address, data_buffer); INCSTAT(stats.thisFrame.numXFLoads); } break; case GX_LOAD_INDX_A: //used for position matrices LoadIndexedXF(DataReadU32(), 0xC); break; case GX_LOAD_INDX_B: //used for normal matrices LoadIndexedXF(DataReadU32(), 0xD); break; case GX_LOAD_INDX_C: //used for postmatrices LoadIndexedXF(DataReadU32(), 0xE); break; case GX_LOAD_INDX_D: //used for lights LoadIndexedXF(DataReadU32(), 0xF); break; case GX_CMD_CALL_DL: { u32 address = DataReadU32(); u32 count = DataReadU32(); ExecuteDisplayList(address, count); } break; case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); break; case GX_CMD_INVL_VC: // Invalidate Vertex Cache DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); break; case GX_LOAD_BP_REG: //0x61 { u32 bp_cmd = DataReadU32(); LoadBPReg(bp_cmd); INCSTAT(stats.thisFrame.numBPLoads); } break; // draw primitives default: if (cmd_byte & 0x80) { // load vertices (use computed vertex size from FifoCommandRunnable above) u16 numVertices = DataReadU16(); VertexLoaderManager::RunVertices( cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices); } else { ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte); break; } break; } // Display lists get added directly into the FIFO stream if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart)); } static void DecodeSemiNop() { u8 *opcodeStart = g_pVideoData; int cmd_byte = DataReadU8(); switch (cmd_byte) { case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that case GX_CMD_INVL_VC: // Invalidate Vertex Cache case GX_NOP: break; case GX_LOAD_CP_REG: //0x08 // We have to let CP writes through because they determine the size of vertices. { u8 sub_cmd = DataReadU8(); u32 value = DataReadU32(); LoadCPReg(sub_cmd, value); INCSTAT(stats.thisFrame.numCPLoads); } break; case GX_LOAD_XF_REG: { u32 Cmd2 = DataReadU32(); int transfer_size = ((Cmd2 >> 16) & 15) + 1; u32 address = Cmd2 & 0xFFFF; GC_ALIGNED128(u32 data_buffer[16]); DataReadU32xFuncs[transfer_size-1](data_buffer); LoadXFReg(transfer_size, address, data_buffer); INCSTAT(stats.thisFrame.numXFLoads); } break; case GX_LOAD_INDX_A: //used for position matrices LoadIndexedXF(DataReadU32(), 0xC); break; case GX_LOAD_INDX_B: //used for normal matrices LoadIndexedXF(DataReadU32(), 0xD); break; case GX_LOAD_INDX_C: //used for postmatrices LoadIndexedXF(DataReadU32(), 0xE); break; case GX_LOAD_INDX_D: //used for lights LoadIndexedXF(DataReadU32(), 0xF); break; case GX_CMD_CALL_DL: // Hm, wonder if any games put tokens in display lists - in that case, // we'll have to parse them too. DataSkip(8); break; case GX_LOAD_BP_REG: //0x61 // We have to let BP writes through because they set tokens and stuff. // TODO: Call a much simplified LoadBPReg instead. { u32 bp_cmd = DataReadU32(); LoadBPReg(bp_cmd); INCSTAT(stats.thisFrame.numBPLoads); } break; // draw primitives default: if (cmd_byte & 0x80) { // load vertices (use computed vertex size from FifoCommandRunnable above) u16 numVertices = DataReadU16(); DataSkip(numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK)); } else { ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte); break; } break; } if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart)); } void OpcodeDecoder_Init() { g_pVideoData = GetVideoBufferStartPtr(); #if _M_SSE >= 0x301 if (cpu_info.bSSSE3) { for (int i = 0; i < 16; ++i) DataReadU32xFuncs[i] = DataReadU32xFuncs_SSSE3[i]; } #endif if (g_Config.bEnableOpenCL) { OpenCL::Initialize(); TexDecoder_OpenCL_Initialize(); } } void OpcodeDecoder_Shutdown() { if (g_Config.bEnableOpenCL) { TexDecoder_OpenCL_Shutdown(); OpenCL::Destroy(); } } void OpcodeDecoder_Run(bool skipped_frame) { if (!skipped_frame) { while (FifoCommandRunnable()) Decode(); } else { while (FifoCommandRunnable()) DecodeSemiNop(); } }