diff --git a/Source/Core/Common/EnumFormatter.h b/Source/Core/Common/EnumFormatter.h index 1ab6bbeadd..26cc910438 100644 --- a/Source/Core/Common/EnumFormatter.h +++ b/Source/Core/Common/EnumFormatter.h @@ -55,9 +55,9 @@ public: constexpr auto parse(fmt::format_parse_context& ctx) { auto it = ctx.begin(), end = ctx.end(); - // 'u' for user display, 's' for shader generation - if (it != end && (*it == 'u' || *it == 's')) - formatting_for_shader = (*it++ == 's'); + // 'u' for user display, 's' for shader generation, 'n' for name only + if (it != end && (*it == 'u' || *it == 's' || *it == 'n')) + format_type = *it++; return it; } @@ -68,19 +68,24 @@ public: const auto value_u = static_cast>(value_s); // Always unsigned const bool has_name = m_names.InBounds(e) && m_names[e] != nullptr; - if (!formatting_for_shader) + switch (format_type) { + default: + case 'u': if (has_name) return fmt::format_to(ctx.out(), "{} ({})", m_names[e], value_s); else return fmt::format_to(ctx.out(), "Invalid ({})", value_s); - } - else - { + case 's': if (has_name) return fmt::format_to(ctx.out(), "{:#x}u /* {} */", value_u, m_names[e]); else return fmt::format_to(ctx.out(), "{:#x}u /* Invalid */", value_u); + case 'n': + if (has_name) + return fmt::format_to(ctx.out(), "{}", m_names[e]); + else + return fmt::format_to(ctx.out(), "Invalid ({})", value_s); } } @@ -92,5 +97,5 @@ protected: private: const array_type m_names; - bool formatting_for_shader = false; + char format_type = 'u'; }; diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index a06a391c06..c0608572df 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -103,16 +103,10 @@ add_library(core DSP/LabelMap.h DSPEmulator.cpp DSPEmulator.h - FifoPlayer/FifoAnalyzer.cpp - FifoPlayer/FifoAnalyzer.h FifoPlayer/FifoDataFile.cpp FifoPlayer/FifoDataFile.h - FifoPlayer/FifoPlaybackAnalyzer.cpp - FifoPlayer/FifoPlaybackAnalyzer.h FifoPlayer/FifoPlayer.cpp FifoPlayer/FifoPlayer.h - FifoPlayer/FifoRecordAnalyzer.cpp - FifoPlayer/FifoRecordAnalyzer.h FifoPlayer/FifoRecorder.cpp FifoPlayer/FifoRecorder.h FreeLookConfig.cpp diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp deleted file mode 100644 index 3ac65e9999..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "Core/FifoPlayer/FifoAnalyzer.h" - -#include - -#include "Common/Assert.h" -#include "Common/MsgHandler.h" -#include "Common/Swap.h" - -#include "Core/FifoPlayer/FifoRecordAnalyzer.h" - -#include "VideoCommon/OpcodeDecoding.h" -#include "VideoCommon/VertexLoader.h" -#include "VideoCommon/VertexLoader_Normal.h" -#include "VideoCommon/VertexLoader_Position.h" -#include "VideoCommon/VertexLoader_TextCoord.h" - -namespace FifoAnalyzer -{ -namespace -{ -u8 ReadFifo8(const u8*& data) -{ - const u8 value = data[0]; - data += 1; - return value; -} - -u16 ReadFifo16(const u8*& data) -{ - const u16 value = Common::swap16(data); - data += 2; - return value; -} - -u32 ReadFifo32(const u8*& data) -{ - const u32 value = Common::swap32(data); - data += 4; - return value; -} - -std::array CalculateVertexElementSizes(int vatIndex, const CPMemory& cpMem) -{ - const TVtxDesc& vtxDesc = cpMem.vtxDesc; - const VAT& vtxAttr = cpMem.vtxAttr[vatIndex]; - - // Colors - const std::array colComp{ - vtxAttr.g0.Color0Comp, - vtxAttr.g0.Color1Comp, - }; - - const std::array tcElements{ - vtxAttr.g0.Tex0CoordElements, vtxAttr.g1.Tex1CoordElements, vtxAttr.g1.Tex2CoordElements, - vtxAttr.g1.Tex3CoordElements, vtxAttr.g1.Tex4CoordElements, vtxAttr.g2.Tex5CoordElements, - vtxAttr.g2.Tex6CoordElements, vtxAttr.g2.Tex7CoordElements, - }; - const std::array tcFormat{ - vtxAttr.g0.Tex0CoordFormat, vtxAttr.g1.Tex1CoordFormat, vtxAttr.g1.Tex2CoordFormat, - vtxAttr.g1.Tex3CoordFormat, vtxAttr.g1.Tex4CoordFormat, vtxAttr.g2.Tex5CoordFormat, - vtxAttr.g2.Tex6CoordFormat, vtxAttr.g2.Tex7CoordFormat, - }; - - std::array sizes{}; - - // Add position and texture matrix indices - sizes[0] = vtxDesc.low.PosMatIdx; - for (size_t i = 0; i < vtxDesc.low.TexMatIdx.Size(); ++i) - { - sizes[i + 1] = vtxDesc.low.TexMatIdx[i]; - } - - // Position - sizes[9] = VertexLoader_Position::GetSize(vtxDesc.low.Position, vtxAttr.g0.PosFormat, - vtxAttr.g0.PosElements); - - // Normals - if (vtxDesc.low.Normal != VertexComponentFormat::NotPresent) - { - sizes[10] = VertexLoader_Normal::GetSize(vtxDesc.low.Normal, vtxAttr.g0.NormalFormat, - vtxAttr.g0.NormalElements, vtxAttr.g0.NormalIndex3); - } - else - { - sizes[10] = 0; - } - - // Colors - for (size_t i = 0; i < vtxDesc.low.Color.Size(); i++) - { - int size = 0; - - switch (vtxDesc.low.Color[i]) - { - case VertexComponentFormat::NotPresent: - break; - case VertexComponentFormat::Direct: - switch (colComp[i]) - { - case ColorFormat::RGB565: - size = 2; - break; - case ColorFormat::RGB888: - size = 3; - break; - case ColorFormat::RGB888x: - size = 4; - break; - case ColorFormat::RGBA4444: - size = 2; - break; - case ColorFormat::RGBA6666: - size = 3; - break; - case ColorFormat::RGBA8888: - size = 4; - break; - default: - ASSERT(0); - break; - } - break; - case VertexComponentFormat::Index8: - size = 1; - break; - case VertexComponentFormat::Index16: - size = 2; - break; - } - - sizes[11 + i] = size; - } - - // Texture coordinates - for (size_t i = 0; i < tcFormat.size(); i++) - { - sizes[13 + i] = - VertexLoader_TextCoord::GetSize(vtxDesc.high.TexCoord[i], tcFormat[i], tcElements[i]); - } - - return sizes; -} -} // Anonymous namespace - -bool s_DrawingObject; -FifoAnalyzer::CPMemory s_CpMem; - -u32 AnalyzeCommand(const u8* data, DecodeMode mode) -{ - const u8* dataStart = data; - - int cmd = ReadFifo8(data); - - switch (cmd) - { - case OpcodeDecoder::GX_NOP: - case OpcodeDecoder::GX_CMD_UNKNOWN_METRICS: - case OpcodeDecoder::GX_CMD_INVL_VC: - break; - - case OpcodeDecoder::GX_LOAD_CP_REG: - { - s_DrawingObject = false; - - u32 cmd2 = ReadFifo8(data); - u32 value = ReadFifo32(data); - LoadCPReg(cmd2, value, s_CpMem); - break; - } - - case OpcodeDecoder::GX_LOAD_XF_REG: - { - s_DrawingObject = false; - - u32 cmd2 = ReadFifo32(data); - u8 streamSize = ((cmd2 >> 16) & 15) + 1; - - data += streamSize * 4; - break; - } - - case OpcodeDecoder::GX_LOAD_INDX_A: - case OpcodeDecoder::GX_LOAD_INDX_B: - case OpcodeDecoder::GX_LOAD_INDX_C: - case OpcodeDecoder::GX_LOAD_INDX_D: - { - s_DrawingObject = false; - - int array = 0xc + (cmd - OpcodeDecoder::GX_LOAD_INDX_A) / 8; - u32 value = ReadFifo32(data); - - if (mode == DecodeMode::Record) - FifoRecordAnalyzer::ProcessLoadIndexedXf(value, array); - break; - } - - case OpcodeDecoder::GX_CMD_CALL_DL: - // The recorder should have expanded display lists into the fifo stream and skipped the call to - // start them - // That is done to make it easier to track where memory is updated - ASSERT(false); - data += 8; - break; - - case OpcodeDecoder::GX_LOAD_BP_REG: - { - s_DrawingObject = false; - ReadFifo32(data); - break; - } - - default: - if (cmd & 0x80) - { - s_DrawingObject = true; - - const std::array sizes = - CalculateVertexElementSizes(cmd & OpcodeDecoder::GX_VAT_MASK, s_CpMem); - - // Determine offset of each element that might be a vertex array - // The first 9 elements are never vertex arrays so we just accumulate their sizes. - int offset = std::accumulate(sizes.begin(), sizes.begin() + 9, 0u); - std::array offsets; - for (size_t i = 0; i < offsets.size(); ++i) - { - offsets[i] = offset; - offset += sizes[i + 9]; - } - - const int vertexSize = offset; - const int numVertices = ReadFifo16(data); - - if (mode == DecodeMode::Record && numVertices > 0) - { - for (size_t i = 0; i < offsets.size(); ++i) - { - FifoRecordAnalyzer::WriteVertexArray(static_cast(i), data + offsets[i], vertexSize, - numVertices); - } - } - - data += numVertices * vertexSize; - } - else - { - PanicAlertFmt("FifoPlayer: Unknown Opcode ({:#x}).\n", cmd); - return 0; - } - break; - } - - return (u32)(data - dataStart); -} - -void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem) -{ - switch (subCmd & CP_COMMAND_MASK) - { - case VCD_LO: - cpMem.vtxDesc.low.Hex = value; - break; - - case VCD_HI: - cpMem.vtxDesc.high.Hex = value; - break; - - case CP_VAT_REG_A: - ASSERT(subCmd - CP_VAT_REG_A < CP_NUM_VAT_REG); - cpMem.vtxAttr[subCmd & CP_VAT_MASK].g0.Hex = value; - break; - - case CP_VAT_REG_B: - ASSERT(subCmd - CP_VAT_REG_B < CP_NUM_VAT_REG); - cpMem.vtxAttr[subCmd & CP_VAT_MASK].g1.Hex = value; - break; - - case CP_VAT_REG_C: - ASSERT(subCmd - CP_VAT_REG_C < CP_NUM_VAT_REG); - cpMem.vtxAttr[subCmd & CP_VAT_MASK].g2.Hex = value; - break; - - case ARRAY_BASE: - cpMem.arrayBases[subCmd & CP_ARRAY_MASK] = value; - break; - - case ARRAY_STRIDE: - cpMem.arrayStrides[subCmd & CP_ARRAY_MASK] = value & 0xFF; - break; - } -} -} // namespace FifoAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoAnalyzer.h deleted file mode 100644 index 4e167cbc9f..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include - -#include "Common/CommonTypes.h" -#include "VideoCommon/CPMemory.h" - -namespace FifoAnalyzer -{ -enum class DecodeMode -{ - Record, - Playback, -}; - -u32 AnalyzeCommand(const u8* data, DecodeMode mode); - -struct CPMemory -{ - TVtxDesc vtxDesc; - std::array vtxAttr; - std::array arrayBases{}; - std::array arrayStrides{}; -}; - -void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem); - -extern bool s_DrawingObject; -extern FifoAnalyzer::CPMemory s_CpMem; -} // namespace FifoAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp deleted file mode 100644 index d5ddf4310f..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h" - -#include - -#include "Common/Assert.h" -#include "Common/CommonTypes.h" -#include "Core/FifoPlayer/FifoAnalyzer.h" -#include "Core/FifoPlayer/FifoDataFile.h" - -using namespace FifoAnalyzer; - -// For debugging -#define LOG_FIFO_CMDS 0 -struct CmdData -{ - u32 size; - u32 offset; - const u8* ptr; -}; - -void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, - std::vector& frameInfo) -{ - u32* cpMem = file->GetCPMem(); - FifoAnalyzer::LoadCPReg(VCD_LO, cpMem[VCD_LO], s_CpMem); - FifoAnalyzer::LoadCPReg(VCD_HI, cpMem[VCD_HI], s_CpMem); - - for (u32 i = 0; i < CP_NUM_VAT_REG; ++i) - { - FifoAnalyzer::LoadCPReg(CP_VAT_REG_A + i, cpMem[CP_VAT_REG_A + i], s_CpMem); - FifoAnalyzer::LoadCPReg(CP_VAT_REG_B + i, cpMem[CP_VAT_REG_B + i], s_CpMem); - FifoAnalyzer::LoadCPReg(CP_VAT_REG_C + i, cpMem[CP_VAT_REG_C + i], s_CpMem); - } - - frameInfo.clear(); - frameInfo.resize(file->GetFrameCount()); - - for (u32 frameIdx = 0; frameIdx < file->GetFrameCount(); ++frameIdx) - { - const FifoFrameInfo& frame = file->GetFrame(frameIdx); - AnalyzedFrameInfo& analyzed = frameInfo[frameIdx]; - - s_DrawingObject = false; - - u32 cmdStart = 0; - u32 nextMemUpdate = 0; - -#if LOG_FIFO_CMDS - // Debugging - std::vector prevCmds; -#endif - - while (cmdStart < frame.fifoData.size()) - { - // Add memory updates that have occurred before this point in the frame - while (nextMemUpdate < frame.memoryUpdates.size() && - frame.memoryUpdates[nextMemUpdate].fifoPosition <= cmdStart) - { - analyzed.memoryUpdates.push_back(frame.memoryUpdates[nextMemUpdate]); - ++nextMemUpdate; - } - - const bool wasDrawing = s_DrawingObject; - const u32 cmdSize = - FifoAnalyzer::AnalyzeCommand(&frame.fifoData[cmdStart], DecodeMode::Playback); - -#if LOG_FIFO_CMDS - CmdData cmdData; - cmdData.offset = cmdStart; - cmdData.ptr = &frame.fifoData[cmdStart]; - cmdData.size = cmdSize; - prevCmds.push_back(cmdData); -#endif - - // Check for error - if (cmdSize == 0) - { - // Clean up frame analysis - analyzed.objectStarts.clear(); - analyzed.objectCPStates.clear(); - analyzed.objectEnds.clear(); - - return; - } - - if (wasDrawing != s_DrawingObject) - { - if (s_DrawingObject) - { - analyzed.objectStarts.push_back(cmdStart); - analyzed.objectCPStates.push_back(s_CpMem); - } - else - { - analyzed.objectEnds.push_back(cmdStart); - } - } - - cmdStart += cmdSize; - } - - if (analyzed.objectEnds.size() < analyzed.objectStarts.size()) - analyzed.objectEnds.push_back(cmdStart); - - ASSERT(analyzed.objectStarts.size() == analyzed.objectCPStates.size()); - ASSERT(analyzed.objectStarts.size() == analyzed.objectEnds.size()); - } -} diff --git a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h deleted file mode 100644 index 78e4c6e7d8..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include - -#include "Core/FifoPlayer/FifoAnalyzer.h" -#include "Core/FifoPlayer/FifoDataFile.h" - -struct AnalyzedFrameInfo -{ - // Start of the primitives for the object (after previous update commands) - std::vector objectStarts; - std::vector objectCPStates; - // End of the primitives for the object - std::vector objectEnds; - std::vector memoryUpdates; -}; - -namespace FifoPlaybackAnalyzer -{ -void AnalyzeFrames(FifoDataFile* file, std::vector& frameInfo); -} // namespace FifoPlaybackAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index 47b9ac2f87..58add3faaa 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -4,6 +4,7 @@ #include "Core/FifoPlayer/FifoPlayer.h" #include +#include #include #include "Common/Assert.h" @@ -12,7 +13,6 @@ #include "Core/ConfigManager.h" #include "Core/Core.h" #include "Core/CoreTiming.h" -#include "Core/FifoPlayer/FifoAnalyzer.h" #include "Core/FifoPlayer/FifoDataFile.h" #include "Core/HW/CPU.h" #include "Core/HW/GPFifo.h" @@ -31,6 +31,136 @@ // TODO: Move texMem somewhere else so this isn't an issue. #include "VideoCommon/TextureDecoder.h" +namespace +{ +class FifoPlaybackAnalyzer : public OpcodeDecoder::Callback +{ +public: + static void AnalyzeFrames(FifoDataFile* file, std::vector& frame_info); + + explicit FifoPlaybackAnalyzer(const u32* cpmem) : m_cpmem(cpmem) {} + + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {} + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); } + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)); + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) {} + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, + const u8* vertex_data)); + OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) {} + OPCODE_CALLBACK(void OnNop(u32 count)); + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {} + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)); + + OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + + bool m_start_of_primitives = false; + bool m_end_of_primitives = false; + bool m_efb_copy = false; + // Internal state, copied to above in OnCommand + bool m_was_primitive = false; + bool m_is_primitive = false; + bool m_is_copy = false; + bool m_is_nop = false; + CPState m_cpmem; +}; + +void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, + std::vector& frame_info) +{ + FifoPlaybackAnalyzer analyzer(file->GetCPMem()); + frame_info.clear(); + frame_info.resize(file->GetFrameCount()); + + for (u32 frame_no = 0; frame_no < file->GetFrameCount(); frame_no++) + { + const FifoFrameInfo& frame = file->GetFrame(frame_no); + AnalyzedFrameInfo& analyzed = frame_info[frame_no]; + + u32 offset = 0; + + u32 part_start = 0; + CPState cpmem; + + while (offset < frame.fifoData.size()) + { + const u32 cmd_size = OpcodeDecoder::RunCommand(&frame.fifoData[offset], + u32(frame.fifoData.size()) - offset, analyzer); + + if (analyzer.m_start_of_primitives) + { + // Start of primitive data for an object + analyzed.AddPart(FramePartType::Commands, part_start, offset, analyzer.m_cpmem); + part_start = offset; + // Copy cpmem now, because end_of_primitives isn't triggered until the first opcode after + // primitive data, and the first opcode might update cpmem + std::memcpy(&cpmem, &analyzer.m_cpmem, sizeof(CPState)); + } + if (analyzer.m_end_of_primitives) + { + // End of primitive data for an object, and thus end of the object + analyzed.AddPart(FramePartType::PrimitiveData, part_start, offset, cpmem); + part_start = offset; + } + + offset += cmd_size; + + if (analyzer.m_efb_copy) + { + // We increase the offset beforehand, so that the trigger EFB copy command is included. + analyzed.AddPart(FramePartType::EFBCopy, part_start, offset, analyzer.m_cpmem); + part_start = offset; + } + } + + // The frame should end with an EFB copy, so part_start should have been updated to the end. + ASSERT(part_start == frame.fifoData.size()); + ASSERT(offset == frame.fifoData.size()); + } +} + +void FifoPlaybackAnalyzer::OnBP(u8 command, u32 value) +{ + if (command == BPMEM_TRIGGER_EFB_COPY) + m_is_copy = true; +} + +void FifoPlaybackAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, + const u8* vertex_data) +{ + m_is_primitive = true; +} + +void FifoPlaybackAnalyzer::OnNop(u32 count) +{ + m_is_nop = true; +} + +void FifoPlaybackAnalyzer::OnCommand(const u8* data, u32 size) +{ + m_start_of_primitives = false; + m_end_of_primitives = false; + m_efb_copy = false; + + if (!m_is_nop) + { + if (m_is_primitive && !m_was_primitive) + m_start_of_primitives = true; + else if (m_was_primitive && !m_is_primitive) + m_end_of_primitives = true; + else if (m_is_copy) + m_efb_copy = true; + + m_was_primitive = m_is_primitive; + } + m_is_primitive = false; + m_is_copy = false; + m_is_nop = false; +} +} // namespace + bool IsPlayingBackFifologWithBrokenEFBCopies = false; FifoPlayer::FifoPlayer() : m_Loop{SConfig::GetInstance().bLoopFifoReplay} @@ -191,7 +321,7 @@ u32 FifoPlayer::GetMaxObjectCount() const u32 result = 0; for (auto& frame : m_FrameInfo) { - const u32 count = static_cast(frame.objectStarts.size()); + const u32 count = frame.part_type_counts[FramePartType::PrimitiveData]; if (count > result) result = count; } @@ -202,7 +332,7 @@ u32 FifoPlayer::GetFrameObjectCount(u32 frame) const { if (frame < m_FrameInfo.size()) { - return static_cast(m_FrameInfo[frame].objectStarts.size()); + return m_FrameInfo[frame].part_type_counts[FramePartType::PrimitiveData]; } return 0; @@ -262,55 +392,35 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& m_ElapsedCycles = 0; m_FrameFifoSize = static_cast(frame.fifoData.size()); - // Determine start and end objects - u32 numObjects = (u32)(info.objectStarts.size()); - u32 drawStart = std::min(numObjects, m_ObjectRangeStart); - u32 drawEnd = std::min(numObjects - 1, m_ObjectRangeEnd); + u32 memory_update = 0; + u32 object_num = 0; - u32 position = 0; - u32 memoryUpdate = 0; - - // Skip memory updates during frame if true + // Skip all memory updates if early memory updates are enabled, as we already wrote them if (m_EarlyMemoryUpdates) { - memoryUpdate = (u32)(frame.memoryUpdates.size()); + memory_update = (u32)(frame.memoryUpdates.size()); } - if (numObjects > 0) + for (const FramePart& part : info.parts) { - u32 objectNum = 0; + bool show_part; - // Write fifo data skipping objects before the draw range - while (objectNum < drawStart) + if (part.m_type == FramePartType::PrimitiveData) { - WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info); - - position = info.objectEnds[objectNum]; - ++objectNum; + show_part = m_ObjectRangeStart <= object_num && object_num <= m_ObjectRangeEnd; + object_num++; + } + else + { + // We always include commands and EFB copies, as commands from earlier objects still apply to + // later ones (games generally do not reconfigure everything for each object) + show_part = true; } - // Write objects in draw range - if (objectNum < numObjects && drawStart <= drawEnd) - { - objectNum = drawEnd; - WriteFramePart(position, info.objectEnds[objectNum], memoryUpdate, frame, info); - position = info.objectEnds[objectNum]; - ++objectNum; - } - - // Write fifo data skipping objects after the draw range - while (objectNum < numObjects) - { - WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info); - - position = info.objectEnds[objectNum]; - ++objectNum; - } + if (show_part) + WriteFramePart(part, &memory_update, frame); } - // Write data after the last object - WriteFramePart(position, static_cast(frame.fifoData.size()), memoryUpdate, frame, info); - FlushWGP(); // Sleep while the GPU is active @@ -321,36 +431,39 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& } } -void FifoPlayer::WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, - const FifoFrameInfo& frame, const AnalyzedFrameInfo& info) +void FifoPlayer::WriteFramePart(const FramePart& part, u32* next_mem_update, + const FifoFrameInfo& frame) { const u8* const data = frame.fifoData.data(); - while (nextMemUpdate < frame.memoryUpdates.size() && dataStart < dataEnd) - { - const MemoryUpdate& memUpdate = info.memoryUpdates[nextMemUpdate]; + u32 data_start = part.m_start; + const u32 data_end = part.m_end; - if (memUpdate.fifoPosition < dataEnd) + while (*next_mem_update < frame.memoryUpdates.size() && data_start < data_end) + { + const MemoryUpdate& memUpdate = frame.memoryUpdates[*next_mem_update]; + + if (memUpdate.fifoPosition < data_end) { - if (dataStart < memUpdate.fifoPosition) + if (data_start < memUpdate.fifoPosition) { - WriteFifo(data, dataStart, memUpdate.fifoPosition); - dataStart = memUpdate.fifoPosition; + WriteFifo(data, data_start, memUpdate.fifoPosition); + data_start = memUpdate.fifoPosition; } WriteMemory(memUpdate); - ++nextMemUpdate; + ++*next_mem_update; } else { - WriteFifo(data, dataStart, dataEnd); - dataStart = dataEnd; + WriteFifo(data, data_start, data_end); + data_start = data_end; } } - if (dataStart < dataEnd) - WriteFifo(data, dataStart, dataEnd); + if (data_start < data_end) + WriteFifo(data, data_start, data_end); } void FifoPlayer::WriteAllMemoryUpdates() diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.h b/Source/Core/Core/FifoPlayer/FifoPlayer.h index 01ce07c4a0..4e2e0ffed7 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.h +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.h @@ -5,16 +5,18 @@ #include #include +#include #include #include +#include "Common/Assert.h" #include "Core/FifoPlayer/FifoDataFile.h" -#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h" #include "Core/PowerPC/CPUCoreBase.h" +#include "VideoCommon/CPMemory.h" +#include "VideoCommon/OpcodeDecoding.h" class FifoDataFile; struct MemoryUpdate; -struct AnalyzedFrameInfo; namespace CPU { @@ -43,16 +45,46 @@ enum class State; // 8. The output of fifoplayer would be wrong. // To keep compatibility with old fifologs, we have this flag which signals texture cache to not -// bother -// hashing the memory and just assume the hash matched. +// bother hashing the memory and just assume the hash matched. // At a later point proper efb copy support should be added to fiforecorder and this flag will -// change -// based on the version of the .dff file, but until then it will always be true when a fifolog is -// playing. +// change based on the version of the .dff file, but until then it will always be true when a +// fifolog is playing. // Shitty global to fix a shitty problem extern bool IsPlayingBackFifologWithBrokenEFBCopies; +enum class FramePartType +{ + Commands, + PrimitiveData, + EFBCopy, +}; + +struct FramePart +{ + constexpr FramePart(FramePartType type, u32 start, u32 end, const CPState& cpmem) + : m_type(type), m_start(start), m_end(end), m_cpmem(cpmem) + { + } + + const FramePartType m_type; + const u32 m_start; + const u32 m_end; + const CPState m_cpmem; +}; + +struct AnalyzedFrameInfo +{ + std::vector parts; + Common::EnumMap part_type_counts; + + void AddPart(FramePartType type, u32 start, u32 end, const CPState& cpmem) + { + parts.emplace_back(type, start, end, cpmem); + part_type_counts[type]++; + } +}; + class FifoPlayer { public: @@ -102,14 +134,12 @@ public: private: class CPUCore; - FifoPlayer(); CPU::State AdvanceFrame(); void WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& info); - void WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, const FifoFrameInfo& frame, - const AnalyzedFrameInfo& info); + void WriteFramePart(const FramePart& part, u32* next_mem_update, const FifoFrameInfo& frame); void WriteAllMemoryUpdates(); void WriteMemory(const MemoryUpdate& memUpdate); diff --git a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp deleted file mode 100644 index 1f9adcc54c..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "Core/FifoPlayer/FifoRecordAnalyzer.h" - -#include - -#include "Common/MsgHandler.h" -#include "Core/FifoPlayer/FifoAnalyzer.h" -#include "Core/FifoPlayer/FifoRecorder.h" -#include "Core/HW/Memmap.h" - -using namespace FifoAnalyzer; - -void FifoRecordAnalyzer::Initialize(const u32* cpMem) -{ - s_DrawingObject = false; - - FifoAnalyzer::LoadCPReg(VCD_LO, cpMem[VCD_LO], s_CpMem); - FifoAnalyzer::LoadCPReg(VCD_HI, cpMem[VCD_HI], s_CpMem); - for (u32 i = 0; i < CP_NUM_VAT_REG; ++i) - FifoAnalyzer::LoadCPReg(CP_VAT_REG_A + i, cpMem[CP_VAT_REG_A + i], s_CpMem); - - const u32* const bases_start = cpMem + ARRAY_BASE; - const u32* const bases_end = bases_start + s_CpMem.arrayBases.size(); - std::copy(bases_start, bases_end, s_CpMem.arrayBases.begin()); - - const u32* const strides_start = cpMem + ARRAY_STRIDE; - const u32* const strides_end = strides_start + s_CpMem.arrayStrides.size(); - std::copy(strides_start, strides_end, s_CpMem.arrayStrides.begin()); -} - -void FifoRecordAnalyzer::ProcessLoadIndexedXf(u32 val, int array) -{ - int index = val >> 16; - int size = ((val >> 12) & 0xF) + 1; - - u32 address = s_CpMem.arrayBases[array] + s_CpMem.arrayStrides[array] * index; - - FifoRecorder::GetInstance().UseMemory(address, size * 4, MemoryUpdate::XF_DATA); -} - -void FifoRecordAnalyzer::WriteVertexArray(int arrayIndex, const u8* vertexData, int vertexSize, - int numVertices) -{ - // Skip if not indexed array - VertexComponentFormat arrayType; - if (arrayIndex == ARRAY_POSITION) - arrayType = s_CpMem.vtxDesc.low.Position; - else if (arrayIndex == ARRAY_NORMAL) - arrayType = s_CpMem.vtxDesc.low.Normal; - else if (arrayIndex >= ARRAY_COLOR0 && arrayIndex < ARRAY_COLOR0 + NUM_COLOR_ARRAYS) - arrayType = s_CpMem.vtxDesc.low.Color[arrayIndex - ARRAY_COLOR0]; - else if (arrayIndex >= ARRAY_TEXCOORD0 && arrayIndex < ARRAY_TEXCOORD0 + NUM_TEXCOORD_ARRAYS) - arrayType = s_CpMem.vtxDesc.high.TexCoord[arrayIndex - ARRAY_TEXCOORD0]; - else - { - PanicAlertFmt("Invalid arrayIndex {}", arrayIndex); - return; - } - - if (!IsIndexed(arrayType)) - return; - - int maxIndex = 0; - - // Determine min and max indices - if (arrayType == VertexComponentFormat::Index8) - { - for (int i = 0; i < numVertices; ++i) - { - int index = *vertexData; - vertexData += vertexSize; - - // 0xff skips the vertex - if (index != 0xff) - { - if (index > maxIndex) - maxIndex = index; - } - } - } - else - { - for (int i = 0; i < numVertices; ++i) - { - int index = Common::swap16(vertexData); - vertexData += vertexSize; - - // 0xffff skips the vertex - if (index != 0xffff) - { - if (index > maxIndex) - maxIndex = index; - } - } - } - - u32 arrayStart = s_CpMem.arrayBases[arrayIndex]; - u32 arraySize = s_CpMem.arrayStrides[arrayIndex] * (maxIndex + 1); - - FifoRecorder::GetInstance().UseMemory(arrayStart, arraySize, MemoryUpdate::VERTEX_STREAM); -} diff --git a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h deleted file mode 100644 index 8c3bd00a86..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "Common/CommonTypes.h" - -namespace FifoRecordAnalyzer -{ -// Must call this before analyzing Fifo commands with FifoAnalyzer::AnalyzeCommand() -void Initialize(const u32* cpMem); - -void ProcessLoadIndexedXf(u32 val, int array); -void WriteVertexArray(int arrayIndex, const u8* vertexData, int vertexSize, int numVertices); -} // namespace FifoRecordAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp index 8c4bf184bc..a47877ef4f 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp +++ b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp @@ -6,13 +6,168 @@ #include #include +#include "Common/Logging/Log.h" #include "Common/MsgHandler.h" #include "Common/Thread.h" + #include "Core/ConfigManager.h" -#include "Core/FifoPlayer/FifoAnalyzer.h" -#include "Core/FifoPlayer/FifoRecordAnalyzer.h" #include "Core/HW/Memmap.h" +#include "VideoCommon/OpcodeDecoding.h" +#include "VideoCommon/XFStructs.h" + +class FifoRecorder::FifoRecordAnalyzer : public OpcodeDecoder::Callback +{ +public: + explicit FifoRecordAnalyzer(FifoRecorder* owner) : m_owner(owner) {} + explicit FifoRecordAnalyzer(FifoRecorder* owner, const u32* cpmem) + : m_owner(owner), m_cpmem(cpmem) + { + } + + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {} + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); } + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) {} + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)); + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, + const u8* vertex_data)); + OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) + { + WARN_LOG_FMT(VIDEO, + "Unhandled display list call {:08x} {:08x}; should have been inlined earlier", + address, size); + } + OPCODE_CALLBACK(void OnNop(u32 count)) {} + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {} + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {} + + OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + +private: + void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type, + u32 component_offset, u32 vertex_size, u16 num_vertices, + const u8* vertex_data); + + FifoRecorder* const m_owner; + CPState m_cpmem; +}; + +void FifoRecorder::FifoRecordAnalyzer::OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size) +{ + const u32 load_address = m_cpmem.array_bases[array] + m_cpmem.array_strides[array] * index; + + m_owner->UseMemory(load_address, size * sizeof(u32), MemoryUpdate::XF_DATA); +} + +// TODO: The following code is copied with modifications from VertexLoaderBase. +// Surely there's a better solution? +#include "VideoCommon/VertexLoader_Color.h" +#include "VideoCommon/VertexLoader_Normal.h" +#include "VideoCommon/VertexLoader_Position.h" +#include "VideoCommon/VertexLoader_TextCoord.h" + +void FifoRecorder::FifoRecordAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, + u8 vat, u32 vertex_size, u16 num_vertices, + const u8* vertex_data) +{ + const auto& vtx_desc = m_cpmem.vtx_desc; + const auto& vtx_attr = m_cpmem.vtx_attr[vat]; + + u32 offset = 0; + + if (vtx_desc.low.PosMatIdx) + offset++; + for (auto texmtxidx : vtx_desc.low.TexMatIdx) + { + if (texmtxidx) + offset++; + } + const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat, + vtx_attr.g0.PosElements); + ProcessVertexComponent(CPArray::Position, vtx_desc.low.Position, offset, vertex_size, + num_vertices, vertex_data); + offset += pos_size; + + const u32 norm_size = + VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat, + vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3); + ProcessVertexComponent(CPArray::Normal, vtx_desc.low.Position, offset, vertex_size, num_vertices, + vertex_data); + offset += norm_size; + + for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++) + { + const u32 color_size = + VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i)); + ProcessVertexComponent(CPArray::Color0 + i, vtx_desc.low.Position, offset, vertex_size, + num_vertices, vertex_data); + offset += color_size; + } + for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++) + { + const u32 tc_size = VertexLoader_TextCoord::GetSize( + vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i)); + ProcessVertexComponent(CPArray::TexCoord0 + i, vtx_desc.low.Position, offset, vertex_size, + num_vertices, vertex_data); + offset += tc_size; + } + + ASSERT(offset == vertex_size); +} + +// If a component is indexed, the array it indexes into for data must be saved. +void FifoRecorder::FifoRecordAnalyzer::ProcessVertexComponent(CPArray array_index, + VertexComponentFormat array_type, + u32 component_offset, u32 vertex_size, + u16 num_vertices, + const u8* vertex_data) +{ + // Skip if not indexed array + if (!IsIndexed(array_type)) + return; + + u16 max_index = 0; + + // Determine min and max indices + if (array_type == VertexComponentFormat::Index8) + { + for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++) + { + const u8 index = vertex_data[component_offset]; + vertex_data += vertex_size; + + // 0xff skips the vertex + if (index != 0xff) + { + if (index > max_index) + max_index = index; + } + } + } + else + { + for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++) + { + const u16 index = Common::swap16(&vertex_data[component_offset]); + vertex_data += vertex_size; + + // 0xffff skips the vertex + if (index != 0xffff) + { + if (index > max_index) + max_index = index; + } + } + } + + const u32 array_start = m_cpmem.array_bases[array_index]; + const u32 array_size = m_cpmem.array_strides[array_index] * (max_index + 1); + + m_owner->UseMemory(array_start, array_size, MemoryUpdate::VERTEX_STREAM); +} + static FifoRecorder instance; FifoRecorder::FifoRecorder() = default; @@ -76,7 +231,7 @@ void FifoRecorder::WriteGPCommand(const u8* data, u32 size) { // Assumes data contains all information for the command // Calls FifoRecorder::UseMemory - const u32 analyzed_size = FifoAnalyzer::AnalyzeCommand(data, FifoAnalyzer::DecodeMode::Record); + const u32 analyzed_size = OpcodeDecoder::RunCommand(data, size, *m_record_analyzer); // Make sure FifoPlayer's command analyzer agrees about the size of the command. if (analyzed_size != size) @@ -211,7 +366,7 @@ void FifoRecorder::SetVideoMemory(const u32* bpMem, const u32* cpMem, const u32* memcpy(m_File->GetTexMem(), texMem, FifoDataFile::TEX_MEM_SIZE); } - FifoRecordAnalyzer::Initialize(cpMem); + m_record_analyzer = std::make_unique(this, cpMem); } bool FifoRecorder::IsRecording() const diff --git a/Source/Core/Core/FifoPlayer/FifoRecorder.h b/Source/Core/Core/FifoPlayer/FifoRecorder.h index cbef424561..3a28d05bce 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecorder.h +++ b/Source/Core/Core/FifoPlayer/FifoRecorder.h @@ -8,6 +8,7 @@ #include #include +#include "Common/Assert.h" #include "Core/FifoPlayer/FifoDataFile.h" class FifoRecorder @@ -47,6 +48,8 @@ public: static FifoRecorder& GetInstance(); private: + class FifoRecordAnalyzer; + // Accessed from both GUI and video threads std::recursive_mutex m_mutex; @@ -65,6 +68,7 @@ private: bool m_SkipFutureData = true; bool m_FrameEnded = false; FifoFrameInfo m_CurrentFrame; + std::unique_ptr m_record_analyzer; std::vector m_FifoData; std::vector m_Ram; std::vector m_ExRam; diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 5897714436..49d2cfe994 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -217,11 +217,8 @@ - - - @@ -815,11 +812,8 @@ - - - diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp index 4fcc0b2fb8..be9daf57d3 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp @@ -3,6 +3,8 @@ #include "DolphinQt/FIFO/FIFOAnalyzer.h" +#include + #include #include #include @@ -27,8 +29,12 @@ #include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/XFStructs.h" +// Values range from 0 to number of frames - 1 constexpr int FRAME_ROLE = Qt::UserRole; -constexpr int OBJECT_ROLE = Qt::UserRole + 1; +// Values range from 0 to number of parts - 1 +constexpr int PART_START_ROLE = Qt::UserRole + 1; +// Values range from 1 to number of parts +constexpr int PART_END_ROLE = Qt::UserRole + 2; FIFOAnalyzer::FIFOAnalyzer() { @@ -144,43 +150,175 @@ void FIFOAnalyzer::UpdateTree() auto* file = FifoPlayer::GetInstance().GetFile(); const u32 frame_count = file->GetFrameCount(); + for (u32 frame = 0; frame < frame_count; frame++) { auto* frame_item = new QTreeWidgetItem({tr("Frame %1").arg(frame)}); recording_item->addChild(frame_item); - const u32 object_count = FifoPlayer::GetInstance().GetFrameObjectCount(frame); - for (u32 object = 0; object < object_count; object++) + const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame); + ASSERT(frame_info.parts.size() != 0); + + Common::EnumMap part_counts; + u32 part_start = 0; + + for (u32 part_nr = 0; part_nr < frame_info.parts.size(); part_nr++) { - auto* object_item = new QTreeWidgetItem({tr("Object %1").arg(object)}); + const auto& part = frame_info.parts[part_nr]; - frame_item->addChild(object_item); + const u32 part_type_nr = part_counts[part.m_type]; + part_counts[part.m_type]++; - object_item->setData(0, FRAME_ROLE, frame); - object_item->setData(0, OBJECT_ROLE, object); + QTreeWidgetItem* object_item = nullptr; + if (part.m_type == FramePartType::PrimitiveData) + object_item = new QTreeWidgetItem({tr("Object %1").arg(part_type_nr)}); + else if (part.m_type == FramePartType::EFBCopy) + object_item = new QTreeWidgetItem({tr("EFB copy %1").arg(part_type_nr)}); + // We don't create dedicated labels for FramePartType::Command; + // those are grouped with the primitive + + if (object_item != nullptr) + { + frame_item->addChild(object_item); + + object_item->setData(0, FRAME_ROLE, frame); + object_item->setData(0, PART_START_ROLE, part_start); + object_item->setData(0, PART_END_ROLE, part_nr); + + part_start = part_nr + 1; + } } + + // We shouldn't end on a Command (it should end with an EFB copy) + ASSERT(part_start == frame_info.parts.size()); + // The counts we computed should match the frame's counts + ASSERT(std::equal(frame_info.part_type_counts.begin(), frame_info.part_type_counts.end(), + part_counts.begin())); } } -static std::string GetPrimitiveName(u8 cmd) +namespace { - if ((cmd & 0xC0) != 0x80) +class DetailCallback : public OpcodeDecoder::Callback +{ +public: + explicit DetailCallback(CPState cpmem) : m_cpmem(cpmem) {} + + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { - PanicAlertFmt("Not a primitive command: {:#04x}", cmd); - return ""; + // Note: No need to update m_cpmem as it already has the final value for this object + + const auto [name, desc] = GetCPRegInfo(command, value); + ASSERT(!name.empty()); + + text = QStringLiteral("CP %1 %2 %3") + .arg(command, 2, 16, QLatin1Char('0')) + .arg(value, 8, 16, QLatin1Char('0')) + .arg(QString::fromStdString(name)); } - const u8 vat = cmd & OpcodeDecoder::GX_VAT_MASK; // Vertex loader index (0 - 7) - const u8 primitive = - (cmd & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT; - static constexpr std::array names = { - "GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)", - "GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP", - "GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES", - "GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS", - }; - return fmt::format("{} VAT {}", names[primitive], vat); -} + + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) + { + const auto [name, desc] = GetXFTransferInfo(address, count, data); + ASSERT(!name.empty()); + + const u32 command = address | (count << 16); + + text = QStringLiteral("XF %1 ").arg(command, 8, 16, QLatin1Char('0')); + + for (u8 i = 0; i < count; i++) + { + const u32 value = Common::swap32(&data[i * 4]); + + text += QStringLiteral("%1 ").arg(value, 8, 16, QLatin1Char('0')); + } + + text += QStringLiteral(" ") + QString::fromStdString(name); + } + + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) + { + const auto [name, desc] = GetBPRegInfo(command, value); + ASSERT(!name.empty()); + + text = QStringLiteral("BP %1 %2 %3") + .arg(command, 2, 16, QLatin1Char('0')) + .arg(value, 6, 16, QLatin1Char('0')) + .arg(QString::fromStdString(name)); + } + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) + { + const auto [desc, written] = GetXFIndexedLoadInfo(array, index, address, size); + text = QStringLiteral("LOAD INDX %1 %2") + .arg(QString::fromStdString(fmt::to_string(array))) + .arg(QString::fromStdString(desc)); + } + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, const u8* vertex_data)) + { + const auto name = fmt::to_string(primitive); + + // Note that vertex_count is allowed to be 0, with no special treatment + // (another command just comes right after the current command, with no vertices in between) + const u32 object_prim_size = num_vertices * vertex_size; + + const u8 opcode = + 0x80 | (static_cast(primitive) << OpcodeDecoder::GX_PRIMITIVE_SHIFT) | vat; + text = QStringLiteral("PRIMITIVE %1 (%2) %3 vertices %4 bytes/vertex %5 total bytes") + .arg(QString::fromStdString(name)) + .arg(opcode, 2, 16, QLatin1Char('0')) + .arg(num_vertices) + .arg(vertex_size) + .arg(object_prim_size); + + // It's not really useful to have a massive unreadable hex string for the object primitives. + // Put it in the description instead. + +// #define INCLUDE_HEX_IN_PRIMITIVES +#ifdef INCLUDE_HEX_IN_PRIMITIVES + text += QStringLiteral(" "); + for (u32 i = 0; i < object_prim_size; i++) + { + text += QStringLiteral("%1").arg(vertex_data[i], 2, 16, QLatin1Char('0')); + } +#endif + } + + OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) + { + text = QObject::tr("Call display list at %1 with size %2") + .arg(address, 8, 16, QLatin1Char('0')) + .arg(size, 8, 16, QLatin1Char('0')); + } + + OPCODE_CALLBACK(void OnNop(u32 count)) + { + if (count > 1) + text = QStringLiteral("NOP (%1x)").arg(count); + else + text = QStringLiteral("NOP"); + } + + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) + { + using OpcodeDecoder::Opcode; + if (static_cast(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS) + text = QStringLiteral("GX_CMD_UNKNOWN_METRICS"); + else if (static_cast(opcode) == Opcode::GX_CMD_INVL_VC) + text = QStringLiteral("GX_CMD_INVL_VC"); + else + text = QStringLiteral("Unknown opcode %1").arg(opcode, 2, 16); + } + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {} + + OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + + QString text; + CPState m_cpmem; +}; +} // namespace void FIFOAnalyzer::UpdateDetails() { @@ -200,205 +338,40 @@ void FIFOAnalyzer::UpdateDetails() const auto items = m_tree_widget->selectedItems(); - if (items.isEmpty() || items[0]->data(0, OBJECT_ROLE).isNull()) + if (items.isEmpty() || items[0]->data(0, PART_START_ROLE).isNull()) return; const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); - const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); + const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt(); + const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt(); - const auto& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); + const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const auto& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); - // Note that frame_info.objectStarts[object_nr] is the start of the primitive data, - // but we want to start with the register updates which happen before that. - const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); - const u32 object_size = frame_info.objectEnds[object_nr] - object_start; - - const u8* const object = &fifo_frame.fifoData[object_start]; + const u32 object_start = frame_info.parts[start_part_nr].m_start; + const u32 object_end = frame_info.parts[end_part_nr].m_end; + const u32 object_size = object_end - object_start; u32 object_offset = 0; + // NOTE: object_info.m_cpmem is the state of cpmem _after_ all of the commands in this object. + // However, it doesn't matter that it doesn't match the start, since it will match by the time + // primitives are reached. + auto callback = DetailCallback(frame_info.parts[end_part_nr].m_cpmem); + while (object_offset < object_size) { - QString new_label; const u32 start_offset = object_offset; m_object_data_offsets.push_back(start_offset); - const u8 command = object[object_offset++]; - switch (command) - { - case OpcodeDecoder::GX_NOP: - if (object[object_offset] == OpcodeDecoder::GX_NOP) - { - u32 nop_count = 2; - while (object[++object_offset] == OpcodeDecoder::GX_NOP) - nop_count++; + object_offset += OpcodeDecoder::RunCommand(&fifo_frame.fifoData[object_start + start_offset], + object_size - start_offset, callback); - new_label = QStringLiteral("NOP (%1x)").arg(nop_count); - } - else - { - new_label = QStringLiteral("NOP"); - } - break; - - case OpcodeDecoder::GX_CMD_UNKNOWN_METRICS: - new_label = QStringLiteral("GX_CMD_UNKNOWN_METRICS"); - break; - - case OpcodeDecoder::GX_CMD_INVL_VC: - new_label = QStringLiteral("GX_CMD_INVL_VC"); - break; - - case OpcodeDecoder::GX_LOAD_CP_REG: - { - const u8 cmd2 = object[object_offset++]; - const u32 value = Common::swap32(&object[object_offset]); - object_offset += 4; - - const auto [name, desc] = GetCPRegInfo(cmd2, value); - ASSERT(!name.empty()); - - new_label = QStringLiteral("CP %1 %2 %3") - .arg(cmd2, 2, 16, QLatin1Char('0')) - .arg(value, 8, 16, QLatin1Char('0')) - .arg(QString::fromStdString(name)); - } - break; - - case OpcodeDecoder::GX_LOAD_XF_REG: - { - const auto [name, desc] = GetXFTransferInfo(&object[object_offset]); - const u32 cmd2 = Common::swap32(&object[object_offset]); - object_offset += 4; - ASSERT(!name.empty()); - - const u8 stream_size = ((cmd2 >> 16) & 15) + 1; - - new_label = QStringLiteral("XF %1 ").arg(cmd2, 8, 16, QLatin1Char('0')); - - for (u8 i = 0; i < stream_size; i++) - { - const u32 value = Common::swap32(&object[object_offset]); - object_offset += 4; - - new_label += QStringLiteral("%1 ").arg(value, 8, 16, QLatin1Char('0')); - } - - new_label += QStringLiteral(" ") + QString::fromStdString(name); - } - break; - - case OpcodeDecoder::GX_LOAD_INDX_A: - { - const auto [desc, written] = - GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(&object[object_offset])); - object_offset += 4; - new_label = QStringLiteral("LOAD INDX A %1").arg(QString::fromStdString(desc)); - } - break; - case OpcodeDecoder::GX_LOAD_INDX_B: - { - const auto [desc, written] = - GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(&object[object_offset])); - object_offset += 4; - new_label = QStringLiteral("LOAD INDX B %1").arg(QString::fromStdString(desc)); - } - break; - case OpcodeDecoder::GX_LOAD_INDX_C: - { - const auto [desc, written] = - GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(&object[object_offset])); - object_offset += 4; - new_label = QStringLiteral("LOAD INDX C %1").arg(QString::fromStdString(desc)); - } - break; - case OpcodeDecoder::GX_LOAD_INDX_D: - { - const auto [desc, written] = - GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(&object[object_offset])); - object_offset += 4; - new_label = QStringLiteral("LOAD INDX D %1").arg(QString::fromStdString(desc)); - } - break; - - case OpcodeDecoder::GX_CMD_CALL_DL: - // The recorder should have expanded display lists into the fifo stream and skipped the - // call to start them - // That is done to make it easier to track where memory is updated - ASSERT(false); - object_offset += 8; - new_label = QStringLiteral("CALL DL"); - break; - - case OpcodeDecoder::GX_LOAD_BP_REG: - { - const u8 cmd2 = object[object_offset++]; - const u32 cmddata = Common::swap24(&object[object_offset]); - object_offset += 3; - - const auto [name, desc] = GetBPRegInfo(cmd2, cmddata); - ASSERT(!name.empty()); - - new_label = QStringLiteral("BP %1 %2 %3") - .arg(cmd2, 2, 16, QLatin1Char('0')) - .arg(cmddata, 6, 16, QLatin1Char('0')) - .arg(QString::fromStdString(name)); - } - break; - - default: - if ((command & 0xC0) == 0x80) - { - // Object primitive data - - const u8 vat = command & OpcodeDecoder::GX_VAT_MASK; - const auto& vtx_desc = frame_info.objectCPStates[object_nr].vtxDesc; - const auto& vtx_attr = frame_info.objectCPStates[object_nr].vtxAttr[vat]; - - const auto name = GetPrimitiveName(command); - - const u16 vertex_count = Common::swap16(&object[object_offset]); - object_offset += 2; - const u32 vertex_size = VertexLoaderBase::GetVertexSize(vtx_desc, vtx_attr); - - // Note that vertex_count is allowed to be 0, with no special treatment - // (another command just comes right after the current command, with no vertices in between) - const u32 object_prim_size = vertex_count * vertex_size; - - new_label = QStringLiteral("PRIMITIVE %1 (%2) %3 vertices %4 bytes/vertex %5 total bytes") - .arg(QString::fromStdString(name)) - .arg(command, 2, 16, QLatin1Char('0')) - .arg(vertex_count) - .arg(vertex_size) - .arg(object_prim_size); - - // It's not really useful to have a massive unreadable hex string for the object primitives. - // Put it in the description instead. - -// #define INCLUDE_HEX_IN_PRIMITIVES -#ifdef INCLUDE_HEX_IN_PRIMITIVES - new_label += QStringLiteral(" "); - for (u32 i = 0; i < object_prim_size; i++) - { - new_label += QStringLiteral("%1").arg(object[object_offset++], 2, 16, QLatin1Char('0')); - } -#else - object_offset += object_prim_size; -#endif - } - else - { - new_label = QStringLiteral("Unknown opcode %1").arg(command, 2, 16); - } - break; - } - new_label = QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) + - new_label; + QString new_label = + QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) + + callback.text; m_detail_list->addItem(new_label); } - ASSERT(object_offset == object_size); - // Needed to ensure the description updates when changing objects m_detail_list->setCurrentRow(0); } @@ -413,12 +386,15 @@ void FIFOAnalyzer::BeginSearch() const auto items = m_tree_widget->selectedItems(); if (items.isEmpty() || items[0]->data(0, FRAME_ROLE).isNull() || - items[0]->data(0, OBJECT_ROLE).isNull()) + items[0]->data(0, PART_START_ROLE).isNull()) { m_search_label->setText(tr("Invalid search parameters (no object selected)")); return; } + // Having PART_START_ROLE indicates that this is valid + const int object_idx = items[0]->parent()->indexOfChild(items[0]); + // TODO: Remove even string length limit if (search_str.length() % 2) { @@ -449,13 +425,15 @@ void FIFOAnalyzer::BeginSearch() m_search_results.clear(); const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); - const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); + const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt(); + const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt(); const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); - const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); - const u32 object_size = frame_info.objectEnds[object_nr] - object_start; + const u32 object_start = frame_info.parts[start_part_nr].m_start; + const u32 object_end = frame_info.parts[end_part_nr].m_end; + const u32 object_size = object_end - object_start; const u8* const object = &fifo_frame.fifoData[object_start]; @@ -474,7 +452,7 @@ void FIFOAnalyzer::BeginSearch() { if (std::equal(search_val.begin(), search_val.end(), ptr)) { - m_search_results.emplace_back(frame_nr, object_nr, cmd_nr); + m_search_results.emplace_back(frame_nr, object_idx, cmd_nr); break; } } @@ -528,7 +506,7 @@ void FIFOAnalyzer::ShowSearchResult(size_t index) const auto& result = m_search_results[index]; QTreeWidgetItem* object_item = - m_tree_widget->topLevelItem(0)->child(result.m_frame)->child(result.m_object); + m_tree_widget->topLevelItem(0)->child(result.m_frame)->child(result.m_object_idx); m_tree_widget->setCurrentItem(object_item); m_detail_list->setCurrentRow(result.m_cmd); @@ -537,6 +515,225 @@ void FIFOAnalyzer::ShowSearchResult(size_t index) m_search_previous->setEnabled(index > 0); } +namespace +{ +// TODO: Not sure whether we should bother translating the descriptions +class DescriptionCallback : public OpcodeDecoder::Callback +{ +public: + explicit DescriptionCallback(const CPState& cpmem) : m_cpmem(cpmem) {} + + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) + { + const auto [name, desc] = GetBPRegInfo(command, value); + ASSERT(!name.empty()); + + text = QObject::tr("BP register "); + text += QString::fromStdString(name); + text += QLatin1Char{'\n'}; + + if (desc.empty()) + text += QObject::tr("No description available"); + else + text += QString::fromStdString(desc); + } + + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) + { + // Note: No need to update m_cpmem as it already has the final value for this object + + const auto [name, desc] = GetCPRegInfo(command, value); + ASSERT(!name.empty()); + + text = QObject::tr("CP register "); + text += QString::fromStdString(name); + text += QLatin1Char{'\n'}; + + if (desc.empty()) + text += QObject::tr("No description available"); + else + text += QString::fromStdString(desc); + } + + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) + { + const auto [name, desc] = GetXFTransferInfo(address, count, data); + ASSERT(!name.empty()); + + text = QObject::tr("XF register "); + text += QString::fromStdString(name); + text += QLatin1Char{'\n'}; + + if (desc.empty()) + text += QObject::tr("No description available"); + else + text += QString::fromStdString(desc); + } + + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) + { + const auto [desc, written] = GetXFIndexedLoadInfo(array, index, address, size); + + text = QString::fromStdString(desc); + text += QLatin1Char{'\n'}; + switch (array) + { + case CPArray::XF_A: + text += QObject::tr("Usually used for position matrices"); + break; + case CPArray::XF_B: + // i18n: A normal matrix is a matrix used for transforming normal vectors. The word "normal" + // does not have its usual meaning here, but rather the meaning of "perpendicular to a + // surface". + text += QObject::tr("Usually used for normal matrices"); + break; + case CPArray::XF_C: + // i18n: Tex coord is short for texture coordinate + text += QObject::tr("Usually used for tex coord matrices"); + break; + case CPArray::XF_D: + text += QObject::tr("Usually used for light objects"); + break; + default: + break; + } + text += QLatin1Char{'\n'}; + text += QString::fromStdString(written); + } + + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, const u8* vertex_data)) + { + const auto name = fmt::format("{} VAT {}", primitive, vat); + + // i18n: In this context, a primitive means a point, line, triangle or rectangle. + // Do not translate the word primitive as if it was an adjective. + text = QObject::tr("Primitive %1").arg(QString::fromStdString(name)); + text += QLatin1Char{'\n'}; + + const auto& vtx_desc = m_cpmem.vtx_desc; + const auto& vtx_attr = m_cpmem.vtx_attr[vat]; + + u32 i = 0; + const auto process_component = [&](VertexComponentFormat cformat, ComponentFormat format, + u32 non_indexed_count, u32 indexed_count = 1) { + u32 count; + if (cformat == VertexComponentFormat::NotPresent) + return; + else if (cformat == VertexComponentFormat::Index8) + { + format = ComponentFormat::UByte; + count = indexed_count; + } + else if (cformat == VertexComponentFormat::Index16) + { + format = ComponentFormat::UShort; + count = indexed_count; + } + else + { + count = non_indexed_count; + } + + const u32 component_size = GetElementSize(format); + for (u32 j = 0; j < count; j++) + { + for (u32 component_off = 0; component_off < component_size; component_off++) + { + text += QStringLiteral("%1").arg(vertex_data[i + component_off], 2, 16, QLatin1Char('0')); + } + if (format == ComponentFormat::Float) + { + const float value = Common::BitCast(Common::swap32(&vertex_data[i])); + text += QStringLiteral(" (%1)").arg(value); + } + i += component_size; + text += QLatin1Char{' '}; + } + text += QLatin1Char{' '}; + }; + const auto process_simple_component = [&](u32 size) { + for (u32 component_off = 0; component_off < size; component_off++) + { + text += QStringLiteral("%1").arg(vertex_data[i + component_off], 2, 16, QLatin1Char('0')); + } + i += size; + text += QLatin1Char{' '}; + text += QLatin1Char{' '}; + }; + + for (u32 vertex_num = 0; vertex_num < num_vertices; vertex_num++) + { + ASSERT(i == vertex_num * vertex_size); + + text += QLatin1Char{'\n'}; + if (vtx_desc.low.PosMatIdx) + process_simple_component(1); + for (auto texmtxidx : vtx_desc.low.TexMatIdx) + { + if (texmtxidx) + process_simple_component(1); + } + process_component(vtx_desc.low.Position, vtx_attr.g0.PosFormat, + vtx_attr.g0.PosElements == CoordComponentCount::XY ? 2 : 3); + // TODO: Is this calculation correct? + const u32 normal_component_count = + vtx_desc.low.Normal == VertexComponentFormat::Direct ? 3 : 1; + const u32 normal_elements = vtx_attr.g0.NormalElements == NormalComponentCount::NBT ? 3 : 1; + process_component(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat, + normal_component_count * normal_elements, + vtx_attr.g0.NormalIndex3 ? normal_elements : 1); + for (u32 c = 0; c < vtx_desc.low.Color.Size(); c++) + { + static constexpr Common::EnumMap component_sizes = { + 2, // RGB565 + 3, // RGB888 + 4, // RGB888x + 2, // RGBA4444 + 3, // RGBA6666 + 4, // RGBA8888 + }; + switch (vtx_desc.low.Color[c]) + { + case VertexComponentFormat::Index8: + process_simple_component(1); + break; + case VertexComponentFormat::Index16: + process_simple_component(2); + break; + case VertexComponentFormat::Direct: + process_simple_component(component_sizes[vtx_attr.GetColorFormat(c)]); + break; + } + } + for (u32 t = 0; t < vtx_desc.high.TexCoord.Size(); t++) + { + process_component(vtx_desc.high.TexCoord[t], vtx_attr.GetTexFormat(t), + vtx_attr.GetTexElements(t) == TexComponentCount::ST ? 2 : 1); + } + } + } + + OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) + { + text = QObject::tr("No description available"); + } + + OPCODE_CALLBACK(void OnNop(u32 count)) { text = QObject::tr("No description available"); } + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) + { + text = QObject::tr("No description available"); + } + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {} + + OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + + QString text; + CPState m_cpmem; +}; +} // namespace + void FIFOAnalyzer::UpdateDescription() { m_entry_detail_browser->clear(); @@ -549,148 +746,24 @@ void FIFOAnalyzer::UpdateDescription() if (items.isEmpty() || m_object_data_offsets.empty()) return; - if (items[0]->data(0, FRAME_ROLE).isNull() || items[0]->data(0, OBJECT_ROLE).isNull()) + if (items[0]->data(0, FRAME_ROLE).isNull() || items[0]->data(0, PART_START_ROLE).isNull()) return; const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); - const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); + const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt(); + const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt(); const u32 entry_nr = m_detail_list->currentRow(); const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); - const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); + const u32 object_start = frame_info.parts[start_part_nr].m_start; + const u32 object_end = frame_info.parts[end_part_nr].m_end; + const u32 object_size = object_end - object_start; const u32 entry_start = m_object_data_offsets[entry_nr]; - const u8* cmddata = &fifo_frame.fifoData[object_start + entry_start]; - - // TODO: Not sure whether we should bother translating the descriptions - - QString text; - if (*cmddata == OpcodeDecoder::GX_LOAD_BP_REG) - { - const u8 cmd = *(cmddata + 1); - const u32 value = Common::swap24(cmddata + 2); - - const auto [name, desc] = GetBPRegInfo(cmd, value); - ASSERT(!name.empty()); - - text = tr("BP register "); - text += QString::fromStdString(name); - text += QLatin1Char{'\n'}; - - if (desc.empty()) - text += tr("No description available"); - else - text += QString::fromStdString(desc); - } - else if (*cmddata == OpcodeDecoder::GX_LOAD_CP_REG) - { - const u8 cmd = *(cmddata + 1); - const u32 value = Common::swap32(cmddata + 2); - - const auto [name, desc] = GetCPRegInfo(cmd, value); - ASSERT(!name.empty()); - - text = tr("CP register "); - text += QString::fromStdString(name); - text += QLatin1Char{'\n'}; - - if (desc.empty()) - text += tr("No description available"); - else - text += QString::fromStdString(desc); - } - else if (*cmddata == OpcodeDecoder::GX_LOAD_XF_REG) - { - const auto [name, desc] = GetXFTransferInfo(cmddata + 1); - ASSERT(!name.empty()); - - text = tr("XF register "); - text += QString::fromStdString(name); - text += QLatin1Char{'\n'}; - - if (desc.empty()) - text += tr("No description available"); - else - text += QString::fromStdString(desc); - } - else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_A) - { - const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(cmddata + 1)); - - text = QString::fromStdString(desc); - text += QLatin1Char{'\n'}; - text += tr("Usually used for position matrices"); - text += QLatin1Char{'\n'}; - text += QString::fromStdString(written); - } - else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_B) - { - const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(cmddata + 1)); - - text = QString::fromStdString(desc); - text += QLatin1Char{'\n'}; - // i18n: A normal matrix is a matrix used for transforming normal vectors. The word "normal" - // does not have its usual meaning here, but rather the meaning of "perpendicular to a surface". - text += tr("Usually used for normal matrices"); - text += QLatin1Char{'\n'}; - text += QString::fromStdString(written); - } - else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_C) - { - const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(cmddata + 1)); - - text = QString::fromStdString(desc); - text += QLatin1Char{'\n'}; - // i18n: Tex coord is short for texture coordinate - text += tr("Usually used for tex coord matrices"); - text += QLatin1Char{'\n'}; - text += QString::fromStdString(written); - } - else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_D) - { - const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(cmddata + 1)); - - text = QString::fromStdString(desc); - text += QLatin1Char{'\n'}; - text += tr("Usually used for light objects"); - text += QLatin1Char{'\n'}; - text += QString::fromStdString(written); - } - else if ((*cmddata & 0xC0) == 0x80) - { - const u8 vat = *cmddata & OpcodeDecoder::GX_VAT_MASK; - const QString name = QString::fromStdString(GetPrimitiveName(*cmddata)); - const u16 vertex_count = Common::swap16(cmddata + 1); - - // i18n: In this context, a primitive means a point, line, triangle or rectangle. - // Do not translate the word primitive as if it was an adjective. - text = tr("Primitive %1").arg(name); - text += QLatin1Char{'\n'}; - - const auto& vtx_desc = frame_info.objectCPStates[object_nr].vtxDesc; - const auto& vtx_attr = frame_info.objectCPStates[object_nr].vtxAttr[vat]; - const auto component_sizes = VertexLoaderBase::GetVertexComponentSizes(vtx_desc, vtx_attr); - - u32 i = 3; - for (u32 vertex_num = 0; vertex_num < vertex_count; vertex_num++) - { - text += QLatin1Char{'\n'}; - for (u32 comp_size : component_sizes) - { - for (u32 comp_off = 0; comp_off < comp_size; comp_off++) - { - text += QStringLiteral("%1").arg(cmddata[i++], 2, 16, QLatin1Char('0')); - } - text += QLatin1Char{' '}; - } - } - } - else - { - text = tr("No description available"); - } - - m_entry_detail_browser->setText(text); + auto callback = DescriptionCallback(frame_info.parts[end_part_nr].m_cpmem); + OpcodeDecoder::RunCommand(&fifo_frame.fifoData[object_start + entry_start], + object_size - entry_start, callback); + m_entry_detail_browser->setText(callback.text); } diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h index 6a1c0a948a..222ce8e06b 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h @@ -58,15 +58,19 @@ private: struct SearchResult { - constexpr SearchResult(u32 frame, u32 object, u32 cmd) - : m_frame(frame), m_object(object), m_cmd(cmd) + constexpr SearchResult(u32 frame, u32 object_idx, u32 cmd) + : m_frame(frame), m_object_idx(object_idx), m_cmd(cmd) { } const u32 m_frame; - const u32 m_object; + // Index in tree view. Does not correspond with object numbers or part numbers. + const u32 m_object_idx; const u32 m_cmd; }; + // Offsets from the start of the first part in an object for each command within the currently + // selected object. std::vector m_object_data_offsets; + std::vector m_search_results; }; diff --git a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp index cdcad8eeb2..c9f9a6ceff 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp @@ -21,7 +21,6 @@ #include "Core/Core.h" #include "Core/FifoPlayer/FifoDataFile.h" -#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h" #include "Core/FifoPlayer/FifoPlayer.h" #include "Core/FifoPlayer/FifoRecorder.h" @@ -151,18 +150,18 @@ void FIFOPlayerWindow::CreateWidgets() layout->addWidget(recording_group); layout->addWidget(m_button_box); - QWidget* main_widget = new QWidget(this); - main_widget->setLayout(layout); + m_main_widget = new QWidget(this); + m_main_widget->setLayout(layout); - auto* tab_widget = new QTabWidget(this); + m_tab_widget = new QTabWidget(this); m_analyzer = new FIFOAnalyzer; - tab_widget->addTab(main_widget, tr("Play / Record")); - tab_widget->addTab(m_analyzer, tr("Analyze")); + m_tab_widget->addTab(m_main_widget, tr("Play / Record")); + m_tab_widget->addTab(m_analyzer, tr("Analyze")); auto* tab_layout = new QVBoxLayout; - tab_layout->addWidget(tab_widget); + tab_layout->addWidget(m_tab_widget); setLayout(tab_layout); } @@ -251,6 +250,8 @@ void FIFOPlayerWindow::OnEmulationStopped() StopRecording(); UpdateControls(); + // When emulation stops, switch away from the analyzer tab, as it no longer shows anything useful + m_tab_widget->setCurrentWidget(m_main_widget); m_analyzer->Update(); } diff --git a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h index 7f6fbf6f1a..2fe7bce352 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h +++ b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h @@ -12,6 +12,7 @@ class QDialogButtonBox; class QLabel; class QPushButton; class QSpinBox; +class QTabWidget; class FIFOAnalyzer; class FIFOPlayerWindow : public QWidget @@ -64,6 +65,9 @@ private: QCheckBox* m_early_memory_updates; QDialogButtonBox* m_button_box; + QWidget* m_main_widget; + QTabWidget* m_tab_widget; + FIFOAnalyzer* m_analyzer; Core::State m_emu_state = Core::State::Uninitialized; }; diff --git a/Source/Core/DolphinTool/CMakeLists.txt b/Source/Core/DolphinTool/CMakeLists.txt index 19eb651273..4b209ac98b 100644 --- a/Source/Core/DolphinTool/CMakeLists.txt +++ b/Source/Core/DolphinTool/CMakeLists.txt @@ -12,7 +12,6 @@ set_target_properties(dolphin-tool PROPERTIES OUTPUT_NAME dolphin-tool) target_link_libraries(dolphin-tool PRIVATE - core discio videocommon cpp-optparse diff --git a/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp b/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp index 80a59b57b3..8f7d9cad12 100644 --- a/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp @@ -3,6 +3,8 @@ #include +#include "Common/EnumMap.h" + #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DRender.h" #include "VideoBackends/D3D/D3DState.h" @@ -20,55 +22,75 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) return std::make_unique(vtx_decl); } -static const DXGI_FORMAT d3d_format_lookup[5 * 4 * 2] = { - // float formats - DXGI_FORMAT_R8_UNORM, - DXGI_FORMAT_R8_SNORM, - DXGI_FORMAT_R16_UNORM, - DXGI_FORMAT_R16_SNORM, - DXGI_FORMAT_R32_FLOAT, - DXGI_FORMAT_R8G8_UNORM, - DXGI_FORMAT_R8G8_SNORM, - DXGI_FORMAT_R16G16_UNORM, - DXGI_FORMAT_R16G16_SNORM, - DXGI_FORMAT_R32G32_FLOAT, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_SNORM, - DXGI_FORMAT_R16G16B16A16_UNORM, - DXGI_FORMAT_R16G16B16A16_SNORM, - DXGI_FORMAT_R32G32B32A32_FLOAT, - - // integer formats - DXGI_FORMAT_R8_UINT, - DXGI_FORMAT_R8_SINT, - DXGI_FORMAT_R16_UINT, - DXGI_FORMAT_R16_SINT, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_R8G8_UINT, - DXGI_FORMAT_R8G8_SINT, - DXGI_FORMAT_R16G16_UINT, - DXGI_FORMAT_R16G16_SINT, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_R8G8B8A8_UINT, - DXGI_FORMAT_R8G8B8A8_SINT, - DXGI_FORMAT_R16G16B16A16_UINT, - DXGI_FORMAT_R16G16B16A16_SINT, - DXGI_FORMAT_UNKNOWN, -}; - -DXGI_FORMAT VarToD3D(VarType t, int size, bool integer) +DXGI_FORMAT VarToD3D(ComponentFormat t, int size, bool integer) { - DXGI_FORMAT retval = d3d_format_lookup[(int)t + 5 * (size - 1) + 5 * 4 * (int)integer]; + using FormatMap = Common::EnumMap; + static constexpr auto f = [](FormatMap a) { return a; }; // Deduction helper + + static constexpr std::array d3d_float_format_lookup = { + f({ + DXGI_FORMAT_R8_UNORM, + DXGI_FORMAT_R8_SNORM, + DXGI_FORMAT_R16_UNORM, + DXGI_FORMAT_R16_SNORM, + DXGI_FORMAT_R32_FLOAT, + }), + f({ + DXGI_FORMAT_R8G8_UNORM, + DXGI_FORMAT_R8G8_SNORM, + DXGI_FORMAT_R16G16_UNORM, + DXGI_FORMAT_R16G16_SNORM, + DXGI_FORMAT_R32G32_FLOAT, + }), + f({ + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_R32G32B32_FLOAT, + }), + f({ + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_SNORM, + DXGI_FORMAT_R16G16B16A16_UNORM, + DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R32G32B32A32_FLOAT, + }), + }; + + static constexpr std::array d3d_integer_format_lookup = { + f({ + DXGI_FORMAT_R8_UINT, + DXGI_FORMAT_R8_SINT, + DXGI_FORMAT_R16_UINT, + DXGI_FORMAT_R16_SINT, + DXGI_FORMAT_UNKNOWN, + }), + f({ + DXGI_FORMAT_R8G8_UINT, + DXGI_FORMAT_R8G8_SINT, + DXGI_FORMAT_R16G16_UINT, + DXGI_FORMAT_R16G16_SINT, + DXGI_FORMAT_UNKNOWN, + }), + f({ + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + }), + f({ + DXGI_FORMAT_R8G8B8A8_UINT, + DXGI_FORMAT_R8G8B8A8_SINT, + DXGI_FORMAT_R16G16B16A16_UINT, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_UNKNOWN, + }), + }; + + DXGI_FORMAT retval = + integer ? d3d_integer_format_lookup[size - 1][t] : d3d_float_format_lookup[size - 1][t]; if (retval == DXGI_FORMAT_UNKNOWN) { PanicAlertFmt("VarToD3D: Invalid type/size combo {}, {}, {}", t, size, integer); diff --git a/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp b/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp index d95c8b9f7f..bd818d1a66 100644 --- a/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp @@ -4,39 +4,43 @@ #include "VideoBackends/D3D12/DX12VertexFormat.h" #include "Common/Assert.h" +#include "Common/EnumMap.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderGen.h" namespace DX12 { -static DXGI_FORMAT VarToDXGIFormat(VarType t, u32 components, bool integer) +static DXGI_FORMAT VarToDXGIFormat(ComponentFormat t, u32 components, bool integer) { + using ComponentArray = std::array; + static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper + // NOTE: 3-component formats are not valid. - static const DXGI_FORMAT float_type_lookup[][4] = { - {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE - {DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM, - DXGI_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE - {DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, - DXGI_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT - {DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM, - DXGI_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT + static constexpr Common::EnumMap float_type_lookup = { + f({DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_UNORM}), // UByte + f({DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM, + DXGI_FORMAT_R8G8B8A8_SNORM}), // Byte + f({DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, + DXGI_FORMAT_R16G16B16A16_UNORM}), // UShort + f({DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R16G16B16A16_SNORM}), // Short + f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float }; - static const DXGI_FORMAT integer_type_lookup[][4] = { - {DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT, - DXGI_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE - {DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT, - DXGI_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE - {DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT, - DXGI_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT - {DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT, - DXGI_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT + static constexpr Common::EnumMap integer_type_lookup = { + f({DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT, + DXGI_FORMAT_R8G8B8A8_UINT}), // UByte + f({DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT, + DXGI_FORMAT_R8G8B8A8_SINT}), // Byte + f({DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT, + DXGI_FORMAT_R16G16B16A16_UINT}), // UShort + f({DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R16G16B16A16_SINT}), // Short + f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float }; ASSERT(components > 0 && components <= 4); diff --git a/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp b/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp index 9a6e568c8d..d21a40ebc1 100644 --- a/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/GL/GLUtil.h" #include "Common/MsgHandler.h" @@ -23,10 +24,11 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) return std::make_unique(vtx_decl); } -static inline GLuint VarToGL(VarType t) +static inline GLuint VarToGL(ComponentFormat t) { - static const GLuint lookup[5] = {GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, - GL_FLOAT}; + static constexpr Common::EnumMap lookup = { + GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, GL_FLOAT, + }; return lookup[t]; } diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index 5c0362b2c5..af99e5ba85 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -36,20 +36,21 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ { DebugUtil::OnObjectBegin(); - u8 primitiveType = 0; + using OpcodeDecoder::Primitive; + Primitive primitive_type = Primitive::GX_DRAW_QUADS; switch (m_current_primitive_type) { case PrimitiveType::Points: - primitiveType = OpcodeDecoder::GX_DRAW_POINTS; + primitive_type = Primitive::GX_DRAW_POINTS; break; case PrimitiveType::Lines: - primitiveType = OpcodeDecoder::GX_DRAW_LINES; + primitive_type = Primitive::GX_DRAW_LINES; break; case PrimitiveType::Triangles: - primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLES; + primitive_type = Primitive::GX_DRAW_TRIANGLES; break; case PrimitiveType::TriangleStrip: - primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP; + primitive_type = Primitive::GX_DRAW_TRIANGLE_STRIP; break; } @@ -57,7 +58,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ if (g_renderer->IsBBoxEnabled()) g_renderer->BBoxFlush(); - m_setup_unit.Init(primitiveType); + m_setup_unit.Init(primitive_type); // set all states with are stored within video sw for (int i = 0; i < 4; i++) @@ -74,7 +75,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ memset(static_cast(&m_vertex), 0, sizeof(m_vertex)); // parse the videocommon format to our own struct format (m_vertex) - SetFormat(g_main_cp_state.last_id, primitiveType); + SetFormat(); ParseVertex(VertexLoaderManager::GetCurrentVertexFormat()->GetVertexDeclaration(), index); // transform this vertex so that it can be used for rasterization (outVertex) @@ -98,7 +99,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ DebugUtil::OnObjectEnd(); } -void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType) +void SWVertexLoader::SetFormat() { // matrix index from xf regs or cp memory? if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx || @@ -144,7 +145,7 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f if (format.enable) { src.Skip(format.offset); - src.Skip(base_component * (1 << (format.type >> 1))); + src.Skip(base_component * GetElementSize(format.type)); int i; for (i = 0; i < std::min(format.components - base_component, components); i++) @@ -152,24 +153,24 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f int i_dst = reverse ? components - i - 1 : i; switch (format.type) { - case VAR_UNSIGNED_BYTE: + case ComponentFormat::UByte: dst[i_dst] = ReadNormalized(src.Read()); break; - case VAR_BYTE: + case ComponentFormat::Byte: dst[i_dst] = ReadNormalized(src.Read()); break; - case VAR_UNSIGNED_SHORT: + case ComponentFormat::UShort: dst[i_dst] = ReadNormalized(src.Read()); break; - case VAR_SHORT: + case ComponentFormat::Short: dst[i_dst] = ReadNormalized(src.Read()); break; - case VAR_FLOAT: + case ComponentFormat::Float: dst[i_dst] = ReadNormalized(src.Read()); break; } - ASSERT_MSG(VIDEO, !format.integer || format.type != VAR_FLOAT, + ASSERT_MSG(VIDEO, !format.integer || format.type != ComponentFormat::Float, "only non-float values are allowed to be streamed as integer"); } for (; i < components; i++) diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index 4bc9f67ca8..bbda8da037 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -22,7 +22,7 @@ public: protected: void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; - void SetFormat(u8 attributeIndex, u8 primitiveType); + void SetFormat(); void ParseVertex(const PortableVertexDeclaration& vdec, int index); InputVertexData m_vertex{}; diff --git a/Source/Core/VideoBackends/Software/SetupUnit.cpp b/Source/Core/VideoBackends/Software/SetupUnit.cpp index b2488a63d8..48ab2b2d9b 100644 --- a/Source/Core/VideoBackends/Software/SetupUnit.cpp +++ b/Source/Core/VideoBackends/Software/SetupUnit.cpp @@ -9,9 +9,9 @@ #include "VideoBackends/Software/Clipper.h" #include "VideoCommon/OpcodeDecoding.h" -void SetupUnit::Init(u8 primitiveType) +void SetupUnit::Init(OpcodeDecoder::Primitive primitive_type) { - m_PrimType = primitiveType; + m_PrimType = primitive_type; m_VertexCounter = 0; m_VertPointer[0] = &m_Vertices[0]; @@ -28,31 +28,32 @@ OutputVertexData* SetupUnit::GetVertex() void SetupUnit::SetupVertex() { + using OpcodeDecoder::Primitive; switch (m_PrimType) { - case OpcodeDecoder::GX_DRAW_QUADS: + case Primitive::GX_DRAW_QUADS: SetupQuad(); break; - case OpcodeDecoder::GX_DRAW_QUADS_2: + case Primitive::GX_DRAW_QUADS_2: WARN_LOG_FMT(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2"); SetupQuad(); break; - case OpcodeDecoder::GX_DRAW_TRIANGLES: + case Primitive::GX_DRAW_TRIANGLES: SetupTriangle(); break; - case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: + case Primitive::GX_DRAW_TRIANGLE_STRIP: SetupTriStrip(); break; - case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: + case Primitive::GX_DRAW_TRIANGLE_FAN: SetupTriFan(); break; - case OpcodeDecoder::GX_DRAW_LINES: + case Primitive::GX_DRAW_LINES: SetupLine(); break; - case OpcodeDecoder::GX_DRAW_LINE_STRIP: + case Primitive::GX_DRAW_LINE_STRIP: SetupLineStrip(); break; - case OpcodeDecoder::GX_DRAW_POINTS: + case Primitive::GX_DRAW_POINTS: SetupPoint(); break; } diff --git a/Source/Core/VideoBackends/Software/SetupUnit.h b/Source/Core/VideoBackends/Software/SetupUnit.h index a9f9584e07..e454c73ff3 100644 --- a/Source/Core/VideoBackends/Software/SetupUnit.h +++ b/Source/Core/VideoBackends/Software/SetupUnit.h @@ -6,9 +6,14 @@ #include "Common/CommonTypes.h" #include "VideoBackends/Software/NativeVertexFormat.h" +namespace OpcodeDecoder +{ +enum class Primitive : u8; +} + class SetupUnit { - u8 m_PrimType = 0; + OpcodeDecoder::Primitive m_PrimType{}; int m_VertexCounter = 0; OutputVertexData m_Vertices[3]; @@ -24,7 +29,7 @@ class SetupUnit void SetupPoint(); public: - void Init(u8 primitiveType); + void Init(OpcodeDecoder::Primitive primitive_type); OutputVertexData* GetVertex(); diff --git a/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp b/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp index e06beafbb8..5f53547066 100644 --- a/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp @@ -4,6 +4,7 @@ #include "VideoBackends/Vulkan/VKVertexFormat.h" #include "Common/Assert.h" +#include "Common/EnumMap.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/ObjectCache.h" @@ -13,32 +14,35 @@ namespace Vulkan { -static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer) +static VkFormat VarToVkFormat(ComponentFormat t, uint32_t components, bool integer) { - static const VkFormat float_type_lookup[][4] = { - {VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM, - VK_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE - {VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM, - VK_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE - {VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM, - VK_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT - {VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM, - VK_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT - {VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, - VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT + using ComponentArray = std::array; + static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper + + static constexpr Common::EnumMap float_type_lookup = { + f({VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM, + VK_FORMAT_R8G8B8A8_UNORM}), // UByte + f({VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM, + VK_FORMAT_R8G8B8A8_SNORM}), // Byte + f({VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM, + VK_FORMAT_R16G16B16A16_UNORM}), // UShort + f({VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM, + VK_FORMAT_R16G16B16A16_SNORM}), // Short + f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT}), // Float }; - static const VkFormat integer_type_lookup[][4] = { - {VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT, - VK_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE - {VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT, - VK_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE - {VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT, - VK_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT - {VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT, - VK_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT - {VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, - VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT + static constexpr Common::EnumMap integer_type_lookup = { + f({VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT, + VK_FORMAT_R8G8B8A8_UINT}), // UByte + f({VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT, + VK_FORMAT_R8G8B8A8_SINT}), // Byte + f({VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT, + VK_FORMAT_R16G16B16A16_UINT}), // UShort + f({VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT, + VK_FORMAT_R16G16B16A16_SINT}), // Short + f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT}), // Float }; ASSERT(components > 0 && components <= 4); diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index fdd32746a9..098e72d5c0 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -258,7 +258,7 @@ enum class TevBias : u32 { Zero = 0, AddHalf = 1, - Subhalf = 2, + SubHalf = 2, Compare = 3 }; template <> @@ -491,6 +491,94 @@ struct fmt::formatter template auto format(const TevStageCombiner::ColorCombiner& cc, FormatContext& ctx) { + auto out = ctx.out(); + if (cc.bias != TevBias::Compare) + { + // Generate an equation view, simplifying out addition of zero and multiplication by 1 + // dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale + // or equivalently and more readably when the terms are not constants: + // dest = (d (OP) lerp(a, b, c) + bias) * scale + // Note that lerping is more complex than the first form shows; see PixelShaderGen's + // WriteTevRegular for more details. + + static constexpr Common::EnumMap alt_names = { + "prev.rgb", "prev.aaa", "c0.rgb", "c0.aaa", "c1.rgb", "c1.aaa", "c2.rgb", "c2.aaa", + "tex.rgb", "tex.aaa", "ras.rgb", "ras.aaa", "1", ".5", "konst.rgb", "0", + }; + + const bool has_d = cc.d != TevColorArg::Zero; + // If c is one, (1 - c) is zero, so (1-c)*a is zero + const bool has_ac = cc.a != TevColorArg::Zero && cc.c != TevColorArg::One; + // If either b or c is zero, b*c is zero + const bool has_bc = cc.b != TevColorArg::Zero && cc.c != TevColorArg::Zero; + const bool has_bias = cc.bias != TevBias::Zero; // != Compare is already known + const bool has_scale = cc.scale != TevScale::Scale1; + + const char op = (cc.op == TevOp::Sub ? '-' : '+'); + + if (cc.dest == TevOutput::Prev) + out = format_to(out, "dest.rgb = "); + else + out = format_to(out, "{:n}.rgb = ", cc.dest); + + if (has_scale) + out = format_to(out, "("); + if (has_d) + out = format_to(out, "{}", alt_names[cc.d]); + if (has_ac || has_bc) + { + if (has_d) + out = format_to(out, " {} ", op); + else if (cc.op == TevOp::Sub) + out = format_to(out, "{}", op); + if (has_ac && has_bc) + { + if (cc.c == TevColorArg::Half) + { + // has_a and has_b imply that c is not Zero or One, and Half is the only remaining + // numeric constant. This results in an average. + out = format_to(out, "({} + {})/2", alt_names[cc.a], alt_names[cc.b]); + } + else + { + out = format_to(out, "lerp({}, {}, {})", alt_names[cc.a], alt_names[cc.b], + alt_names[cc.c]); + } + } + else if (has_ac) + { + if (cc.c == TevColorArg::Zero) + out = format_to(out, "{}", alt_names[cc.a]); + else if (cc.c == TevColorArg::Half) // 1 - .5 is .5 + out = format_to(out, ".5*{}", alt_names[cc.a]); + else + out = format_to(out, "(1 - {})*{}", alt_names[cc.c], alt_names[cc.a]); + } + else // has_bc + { + if (cc.c == TevColorArg::One) + out = format_to(out, "{}", alt_names[cc.b]); + else + out = format_to(out, "{}*{}", alt_names[cc.c], alt_names[cc.b]); + } + } + if (has_bias) + { + if (has_ac || has_bc || has_d) + out = format_to(out, cc.bias == TevBias::AddHalf ? " + .5" : " - .5"); + else + out = format_to(out, cc.bias == TevBias::AddHalf ? ".5" : "-.5"); + } + else + { + // If nothing has been written so far, add a zero + if (!(has_ac || has_bc || has_d)) + out = format_to(out, "0"); + } + if (has_scale) + out = format_to(out, ") * {:n}", cc.scale); + out = format_to(out, "\n\n"); + } return format_to(ctx.out(), "a: {}\n" "b: {}\n" @@ -512,7 +600,80 @@ struct fmt::formatter template auto format(const TevStageCombiner::AlphaCombiner& ac, FormatContext& ctx) { - return format_to(ctx.out(), + auto out = ctx.out(); + if (ac.bias != TevBias::Compare) + { + // Generate an equation view, simplifying out addition of zero and multiplication by 1 + // dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale + // or equivalently and more readably when the terms are not constants: + // dest = (d (OP) lerp(a, b, c) + bias) * scale + // Note that lerping is more complex than the first form shows; see PixelShaderGen's + // WriteTevRegular for more details. + + // We don't need an alt_names map here, unlike the color combiner, as the only special term is + // Zero, and we we filter that out below. However, we do need to append ".a" to all + // parameters, to make it explicit that these are operations on the alpha term instead of the + // 4-element vector. We also need to use the :n specifier so that the numeric ID isn't shown. + + const bool has_d = ac.d != TevAlphaArg::Zero; + // There is no c value for alpha that results in (1 - c) always being zero + const bool has_ac = ac.a != TevAlphaArg::Zero; + // If either b or c is zero, b*c is zero + const bool has_bc = ac.b != TevAlphaArg::Zero && ac.c != TevAlphaArg::Zero; + const bool has_bias = ac.bias != TevBias::Zero; // != Compare is already known + const bool has_scale = ac.scale != TevScale::Scale1; + + const char op = (ac.op == TevOp::Sub ? '-' : '+'); + + if (ac.dest == TevOutput::Prev) + out = format_to(out, "dest.a = "); + else + out = format_to(out, "{:n}.a = ", ac.dest); + + if (has_scale) + out = format_to(out, "("); + if (has_d) + out = format_to(out, "{:n}.a", ac.d); + if (has_ac || has_bc) + { + if (has_d) + out = format_to(out, " {} ", op); + else if (ac.op == TevOp::Sub) + out = format_to(out, "{}", op); + if (has_ac && has_bc) + { + out = format_to(out, "lerp({:n}.a, {:n}.a, {:n}.a)", ac.a, ac.b, ac.c); + } + else if (has_ac) + { + if (ac.c == TevAlphaArg::Zero) + out = format_to(out, "{:n}.a", ac.a); + else + out = format_to(out, "(1 - {:n}.a)*{:n}.a", ac.c, ac.a); + } + else // has_bc + { + out = format_to(out, "{:n}.a*{:n}.a", ac.c, ac.b); + } + } + if (has_bias) + { + if (has_ac || has_bc || has_d) + out = format_to(out, ac.bias == TevBias::AddHalf ? " + .5" : " - .5"); + else + out = format_to(out, ac.bias == TevBias::AddHalf ? ".5" : "-.5"); + } + else + { + // If nothing has been written so far, add a zero + if (!(has_ac || has_bc || has_d)) + out = format_to(out, "0"); + } + if (has_scale) + out = format_to(out, ") * {:n}", ac.scale); + out = format_to(out, "\n\n"); + } + return format_to(out, "a: {}\n" "b: {}\n" "c: {}\n" @@ -756,14 +917,14 @@ struct fmt::formatter : EnumFormatter formatter() : EnumFormatter({"Edge LOD", "Diagonal LOD"}) {} }; -enum class MaxAnsio +enum class MaxAniso { One = 0, Two = 1, Four = 2, }; template <> -struct fmt::formatter : EnumFormatter +struct fmt::formatter : EnumFormatter { formatter() : EnumFormatter({"1", "2", "4"}) {} }; @@ -777,7 +938,7 @@ union TexMode0 BitField<7, 1, FilterMode> min_filter; BitField<8, 1, LODType> diag_lod; BitField<9, 8, s32> lod_bias; - BitField<19, 2, MaxAnsio> max_aniso; + BitField<19, 2, MaxAniso> max_aniso; BitField<21, 1, bool, u32> lod_clamp; u32 hex; }; @@ -2205,7 +2366,7 @@ struct BPMemory extern BPMemory bpmem; -void LoadBPReg(u32 value0, int cycles_into_future); -void LoadBPRegPreprocess(u32 value0, int cycles_into_future); +void LoadBPReg(u8 reg, u32 value, int cycles_into_future); +void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future); std::pair GetBPRegInfo(u8 cmd, u32 cmddata); diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 0fc4ca6785..503ef6154f 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -716,29 +716,27 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future) bp.newvalue); } -// Call browser: OpcodeDecoding.cpp ExecuteDisplayList > Decode() > LoadBPReg() -void LoadBPReg(u32 value0, int cycles_into_future) +// Call browser: OpcodeDecoding.cpp RunCallback::OnBP() +void LoadBPReg(u8 reg, u32 value, int cycles_into_future) { - int regNum = value0 >> 24; - int oldval = ((u32*)&bpmem)[regNum]; - int newval = (oldval & ~bpmem.bpMask) | (value0 & bpmem.bpMask); + int oldval = ((u32*)&bpmem)[reg]; + int newval = (oldval & ~bpmem.bpMask) | (value & bpmem.bpMask); int changes = (oldval ^ newval) & 0xFFFFFF; - BPCmd bp = {regNum, changes, newval}; + BPCmd bp = {reg, changes, newval}; // Reset the mask register if we're not trying to set it ourselves. - if (regNum != BPMEM_BP_MASK) + if (reg != BPMEM_BP_MASK) bpmem.bpMask = 0xFFFFFF; BPWritten(bp, cycles_into_future); } -void LoadBPRegPreprocess(u32 value0, int cycles_into_future) +void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future) { - int regNum = value0 >> 24; - // masking could hypothetically be a problem - u32 newval = value0 & 0xffffff; - switch (regNum) + // masking via BPMEM_BP_MASK could hypothetically be a problem + u32 newval = value & 0xffffff; + switch (reg) { case BPMEM_SETDRAWDONE: if ((newval & 0xff) == 0x02) diff --git a/Source/Core/VideoCommon/CPMemory.cpp b/Source/Core/VideoCommon/CPMemory.cpp index 1184e10ca3..0df0d9f1d8 100644 --- a/Source/Core/VideoCommon/CPMemory.cpp +++ b/Source/Core/VideoCommon/CPMemory.cpp @@ -2,7 +2,14 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "VideoCommon/CPMemory.h" + +#include + #include "Common/ChunkFile.h" +#include "Common/Logging/Log.h" +#include "Core/DolphinAnalytics.h" +#include "VideoCommon/CommandProcessor.h" +#include "VideoCommon/VertexLoaderManager.h" // CP state CPState g_main_cp_state; @@ -22,13 +29,13 @@ void DoCPState(PointerWrap& p) if (p.mode == PointerWrap::MODE_READ) { CopyPreprocessCPStateFromMain(); - g_main_cp_state.bases_dirty = true; + VertexLoaderManager::g_bases_dirty = true; } } void CopyPreprocessCPStateFromMain() { - memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState)); + std::memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState)); } std::pair GetCPRegInfo(u8 cmd, u32 value) @@ -62,12 +69,167 @@ std::pair GetCPRegInfo(u8 cmd, u32 value) return std::make_pair(fmt::format("CP_VAT_REG_C - Format {}", cmd & CP_VAT_MASK), fmt::to_string(UVAT_group2{.Hex = value})); case ARRAY_BASE: - return std::make_pair(fmt::format("ARRAY_BASE Array {}", cmd & CP_ARRAY_MASK), - fmt::format("Base address {:08x}", value)); + return std::make_pair( + fmt::format("ARRAY_BASE Array {}", static_cast(cmd & CP_ARRAY_MASK)), + fmt::format("Base address {:08x}", value)); case ARRAY_STRIDE: - return std::make_pair(fmt::format("ARRAY_STRIDE Array {}", cmd - ARRAY_STRIDE), - fmt::format("Stride {:02x}", value & 0xff)); + return std::make_pair( + fmt::format("ARRAY_STRIDE Array {}", static_cast(cmd & CP_ARRAY_MASK)), + fmt::format("Stride {:02x}", value & 0xff)); default: return std::make_pair(fmt::format("Invalid CP register {:02x} = {:08x}", cmd, value), ""); } } + +CPState::CPState(const u32* memory) : CPState() +{ + matrix_index_a.Hex = memory[MATINDEX_A]; + matrix_index_b.Hex = memory[MATINDEX_B]; + vtx_desc.low.Hex = memory[VCD_LO]; + vtx_desc.high.Hex = memory[VCD_HI]; + + for (u32 i = 0; i < CP_NUM_VAT_REG; i++) + { + vtx_attr[i].g0.Hex = memory[CP_VAT_REG_A + i]; + vtx_attr[i].g1.Hex = memory[CP_VAT_REG_B + i]; + vtx_attr[i].g2.Hex = memory[CP_VAT_REG_C + i]; + } + + for (u32 i = 0; i < CP_NUM_ARRAYS; i++) + { + array_bases[static_cast(i)] = memory[ARRAY_BASE + i]; + array_strides[static_cast(i)] = memory[ARRAY_STRIDE + i]; + } +} + +void CPState::LoadCPReg(u8 sub_cmd, u32 value) +{ + switch (sub_cmd & CP_COMMAND_MASK) + { + case UNKNOWN_00: + case UNKNOWN_10: + case UNKNOWN_20: + if (!(sub_cmd == UNKNOWN_20 && value == 0)) + { + // All titles using libogc or the official SDK issue 0x20 with value=0 on startup + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND); + DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}", + sub_cmd); + } + break; + + case MATINDEX_A: + if (sub_cmd != MATINDEX_A) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, + "CP MATINDEX_A: an exact value of {:02x} was expected " + "but instead a value of {:02x} was seen", + MATINDEX_A, sub_cmd); + } + + matrix_index_a.Hex = value; + break; + + case MATINDEX_B: + if (sub_cmd != MATINDEX_B) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, + "CP MATINDEX_B: an exact value of {:02x} was expected " + "but instead a value of {:02x} was seen", + MATINDEX_B, sub_cmd); + } + + matrix_index_b.Hex = value; + break; + + case VCD_LO: + if (sub_cmd != VCD_LO) // Stricter than YAGCD + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, + "CP VCD_LO: an exact value of {:02x} was expected " + "but instead a value of {:02x} was seen", + VCD_LO, sub_cmd); + } + + vtx_desc.low.Hex = value; + break; + + case VCD_HI: + if (sub_cmd != VCD_HI) // Stricter than YAGCD + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, + "CP VCD_HI: an exact value of {:02x} was expected " + "but instead a value of {:02x} was seen", + VCD_HI, sub_cmd); + } + + vtx_desc.high.Hex = value; + break; + + case CP_VAT_REG_A: + if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A); + } + vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value; + break; + + case CP_VAT_REG_B: + if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B); + } + vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value; + break; + + case CP_VAT_REG_C: + if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C); + } + vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value; + break; + + // Pointers to vertex arrays in GC RAM + case ARRAY_BASE: + array_bases[static_cast(sub_cmd & CP_ARRAY_MASK)] = + value & CommandProcessor::GetPhysicalAddressMask(); + break; + + case ARRAY_STRIDE: + array_strides[static_cast(sub_cmd & CP_ARRAY_MASK)] = value & 0xFF; + break; + + default: + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND); + WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value); + } +} + +void CPState::FillCPMemoryArray(u32* memory) const +{ + memory[MATINDEX_A] = matrix_index_a.Hex; + memory[MATINDEX_B] = matrix_index_b.Hex; + memory[VCD_LO] = vtx_desc.low.Hex; + memory[VCD_HI] = vtx_desc.high.Hex; + + for (int i = 0; i < CP_NUM_VAT_REG; ++i) + { + memory[CP_VAT_REG_A + i] = vtx_attr[i].g0.Hex; + memory[CP_VAT_REG_B + i] = vtx_attr[i].g1.Hex; + memory[CP_VAT_REG_C + i] = vtx_attr[i].g2.Hex; + } + + for (int i = 0; i < CP_NUM_ARRAYS; ++i) + { + memory[ARRAY_BASE + i] = array_bases[static_cast(i)]; + memory[ARRAY_STRIDE + i] = array_strides[static_cast(i)]; + } +} diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index 81d0316cec..52b7038e7c 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -5,12 +5,14 @@ #include #include +#include #include #include "Common/BitField.h" #include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/EnumFormatter.h" +#include "Common/EnumMap.h" #include "Common/MsgHandler.h" enum @@ -53,24 +55,46 @@ enum }; // Vertex array numbers -enum +enum class CPArray : u8 { - ARRAY_POSITION = 0, - ARRAY_NORMAL = 1, - ARRAY_COLOR0 = 2, - NUM_COLOR_ARRAYS = 2, - ARRAY_TEXCOORD0 = 4, - NUM_TEXCOORD_ARRAYS = 8, + Position = 0, + Normal = 1, - ARRAY_XF_A = 12, // Usually used for position matrices - ARRAY_XF_B = 13, // Usually used for normal matrices - ARRAY_XF_C = 14, // Usually used for tex coord matrices - ARRAY_XF_D = 15, // Usually used for light objects + Color0 = 2, + Color1 = 3, - // Number of arrays related to vertex components (position, normal, color, tex coord) - // Excludes the 4 arrays used for indexed XF loads - NUM_VERTEX_COMPONENT_ARRAYS = 12, + TexCoord0 = 4, + TexCoord1 = 5, + TexCoord2 = 6, + TexCoord3 = 7, + TexCoord4 = 8, + TexCoord5 = 9, + TexCoord6 = 10, + TexCoord7 = 11, + + XF_A = 12, // Usually used for position matrices + XF_B = 13, // Usually used for normal matrices + XF_C = 14, // Usually used for tex coord matrices + XF_D = 15, // Usually used for light objects }; +template <> +struct fmt::formatter : EnumFormatter +{ + static constexpr array_type names = {"Position", "Normal", "Color 0", "Color 1", + "Tex Coord 0", "Tex Coord 1", "Tex Coord 2", "Tex Coord 3", + "Tex Coord 4", "Tex Coord 5", "Tex Coord 6", "Tex Coord 7", + "XF A", "XF B", "XF C", "XF D"}; + formatter() : EnumFormatter(names) {} +}; +// Intended for offsetting from Color0/TexCoord0 +constexpr CPArray operator+(CPArray array, u8 offset) +{ + return static_cast(static_cast(array) + offset); +} + +// Number of arrays related to vertex components (position, normal, color, tex coord) +// Excludes the 4 arrays used for indexed XF loads +constexpr u8 NUM_VERTEX_COMPONENT_ARRAYS = 12; // Vertex components enum class VertexComponentFormat @@ -607,32 +631,29 @@ class VertexLoaderBase; // STATE_TO_SAVE struct CPState final { - u32 array_bases[CP_NUM_ARRAYS]{}; - u32 array_strides[CP_NUM_ARRAYS]{}; + CPState() = default; + explicit CPState(const u32* memory); + + // Mutates the CP state based on the given command and value. + void LoadCPReg(u8 sub_cmd, u32 value); + // Fills memory with data from CP regs. There should be space for 0x100 values in memory. + void FillCPMemoryArray(u32* memory) const; + + Common::EnumMap array_bases; + Common::EnumMap array_strides; TMatrixIndexA matrix_index_a{}; TMatrixIndexB matrix_index_b{}; TVtxDesc vtx_desc; // Most games only use the first VtxAttr and simply reconfigure it all the time as needed. - VAT vtx_attr[CP_NUM_VAT_REG]{}; - - // Attributes that actually belong to VertexLoaderManager: - BitSet32 attr_dirty{}; - bool bases_dirty = false; - VertexLoaderBase* vertex_loaders[CP_NUM_VAT_REG]{}; - int last_id = 0; + std::array vtx_attr{}; }; +static_assert(std::is_trivially_copyable_v); class PointerWrap; extern CPState g_main_cp_state; extern CPState g_preprocess_cp_state; -// Might move this into its own file later. -void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false); - -// Fills memory with data from CP regs -void FillCPMemoryArray(u32* memory); - void DoCPState(PointerWrap& p); void CopyPreprocessCPStateFromMain(); diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 83784a137f..ddaa0e72a3 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -5,6 +5,7 @@ #include #include +#include #include "Common/Assert.h" #include "Common/ChunkFile.h" @@ -607,10 +608,10 @@ void SetCpClearRegister() { } -void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess) +void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess) { // TODO(Omega): Maybe dump FIFO to file on this error - PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, {2}).\n" + PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, preprocess={2}).\n" "This means one of the following:\n" "* The emulated GPU got desynced, disabling dual core can help\n" "* Command stream corrupted by some spurious memory bug\n" @@ -618,7 +619,7 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess) "* Some other sort of bug\n\n" "Further errors will be sent to the Video Backend log and\n" "Dolphin will now likely crash or hang. Enjoy.", - cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false"); + cmd_byte, fmt::ptr(buffer), preprocess); { PanicAlertFmt("Illegal command {:02x}\n" diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index 4ca73c71d2..2da7f1c84d 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -169,7 +169,7 @@ void SetCpClearRegister(); void SetCpControlRegister(); void SetCpStatusRegister(); -void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess); +void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess); u32 GetPhysicalAddressMask(); diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 04fc00d33a..f96c71b550 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -273,8 +273,8 @@ static void ReadDataFromFifoOnCPU(u32 readPtr) } } Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len); - s_video_buffer_pp_read_ptr = OpcodeDecoder::Run( - DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false); + s_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo( + DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr); // This would have to be locked if the GPU thread didn't spin. s_video_buffer_write_ptr = write_ptr + len; } @@ -316,7 +316,7 @@ void RunGpuLoop() if (write_ptr > seen_ptr) { s_video_buffer_read_ptr = - OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); + OpcodeDecoder::RunFifo(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr); s_video_buffer_seen_ptr = write_ptr; } } @@ -349,8 +349,8 @@ void RunGpuLoop() fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32); u8* write_ptr = s_video_buffer_write_ptr; - s_video_buffer_read_ptr = OpcodeDecoder::Run( - DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); + s_video_buffer_read_ptr = OpcodeDecoder::RunFifo( + DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted); fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed); fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst); @@ -466,8 +466,8 @@ static int RunGpuOnCpu(int ticks) } ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed)); u32 cycles = 0; - s_video_buffer_read_ptr = OpcodeDecoder::Run( - DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false); + s_video_buffer_read_ptr = OpcodeDecoder::RunFifo( + DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles); available_ticks -= cycles; } diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp index 211adda800..43c213f8ba 100644 --- a/Source/Core/VideoCommon/FramebufferManager.cpp +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -837,12 +837,12 @@ bool FramebufferManager::CompilePokePipelines() { PortableVertexDeclaration vtx_decl = {}; vtx_decl.position.enable = true; - vtx_decl.position.type = VAR_FLOAT; + vtx_decl.position.type = ComponentFormat::Float; vtx_decl.position.components = 4; vtx_decl.position.integer = false; vtx_decl.position.offset = offsetof(EFBPokeVertex, position); vtx_decl.colors[0].enable = true; - vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; + vtx_decl.colors[0].type = ComponentFormat::UByte; vtx_decl.colors[0].components = 4; vtx_decl.colors[0].integer = false; vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color); diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index c8b4bdc261..4108a6efd6 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -6,25 +6,29 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/LightingShaderGen.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" -constexpr std::array primitives_ogl{ +constexpr Common::EnumMap primitives_ogl{ "points", "lines", "triangles", "triangles", }; -constexpr std::array primitives_d3d{ +constexpr Common::EnumMap primitives_d3d{ "point", "line", "triangle", "triangle", }; +constexpr Common::EnumMap vertex_in_map{1u, 2u, 3u, 3u}; +constexpr Common::EnumMap vertex_out_map{4u, 4u, 4u, 3u}; + bool geometry_shader_uid_data::IsPassthrough() const { const bool stereo = g_ActiveConfig.stereo_mode != StereoMode::Off; @@ -61,9 +65,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& const bool ssaa = host_config.ssaa; const bool stereo = host_config.stereo; const auto primitive_type = static_cast(uid_data->primitive_type); - const auto primitive_type_index = static_cast(uid_data->primitive_type); - const auto vertex_in = std::min(static_cast(primitive_type_index) + 1, 3u); - u32 vertex_out = primitive_type == PrimitiveType::TriangleStrip ? 3 : 4; + const u32 vertex_in = vertex_in_map[primitive_type]; + u32 vertex_out = vertex_out_map[primitive_type]; if (wireframe) vertex_out++; @@ -73,14 +76,14 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& // Insert layout parameters if (host_config.backend_gs_instancing) { - out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index], + out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type], stereo ? 2 : 1); out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", vertex_out); } else { - out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]); + out.Write("layout({}) in;\n", primitives_ogl[primitive_type]); out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", stereo ? vertex_out * 2 : vertex_out); } @@ -139,13 +142,13 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1); out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream output, in uint " "InstanceID : SV_GSInstanceID)\n{{\n", - primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle"); + primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle"); } else { out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out); out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream output)\n{{\n", - primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle"); + primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle"); } out.Write("\tVertexData ps;\n"); diff --git a/Source/Core/VideoCommon/IndexGenerator.cpp b/Source/Core/VideoCommon/IndexGenerator.cpp index d71f6c8292..be2dc99e3a 100644 --- a/Source/Core/VideoCommon/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/IndexGenerator.cpp @@ -202,25 +202,27 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index) void IndexGenerator::Init() { + using OpcodeDecoder::Primitive; + if (g_Config.backend_info.bSupportsPrimitiveRestart) { - m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads; - m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan; + m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads; + m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard; + m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList; + m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip; + m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan; } else { - m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads; - m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan; + m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads; + m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard; + m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList; + m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip; + m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan; } - m_primitive_table[OpcodeDecoder::GX_DRAW_LINES] = AddLineList; - m_primitive_table[OpcodeDecoder::GX_DRAW_LINE_STRIP] = AddLineStrip; - m_primitive_table[OpcodeDecoder::GX_DRAW_POINTS] = AddPoints; + m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList; + m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip; + m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints; } void IndexGenerator::Start(u16* index_ptr) @@ -230,7 +232,7 @@ void IndexGenerator::Start(u16* index_ptr) m_base_index = 0; } -void IndexGenerator::AddIndices(int primitive, u32 num_vertices) +void IndexGenerator::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices) { m_index_buffer_current = m_primitive_table[primitive](m_index_buffer_current, num_vertices, m_base_index); diff --git a/Source/Core/VideoCommon/IndexGenerator.h b/Source/Core/VideoCommon/IndexGenerator.h index 00c8f73132..32cf21e207 100644 --- a/Source/Core/VideoCommon/IndexGenerator.h +++ b/Source/Core/VideoCommon/IndexGenerator.h @@ -6,8 +6,9 @@ #pragma once -#include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" +#include "VideoCommon/OpcodeDecoding.h" class IndexGenerator { @@ -15,7 +16,7 @@ public: void Init(); void Start(u16* index_ptr); - void AddIndices(int primitive, u32 num_vertices); + void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices); @@ -30,5 +31,5 @@ private: u32 m_base_index = 0; using PrimitiveFunction = u16* (*)(u16*, u32, u32); - std::array m_primitive_table{}; + Common::EnumMap m_primitive_table{}; }; diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index e4aa0f7e61..7bbf0bd38c 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -8,6 +8,7 @@ #include "Common/CommonTypes.h" #include "Common/Hash.h" +#include "VideoCommon/CPMemory.h" // m_components enum @@ -45,18 +46,9 @@ enum VB_HAS_UVTEXMTXSHIFT = 13, }; -enum VarType -{ - VAR_UNSIGNED_BYTE, // GX_U8 = 0 - VAR_BYTE, // GX_S8 = 1 - VAR_UNSIGNED_SHORT, // GX_U16 = 2 - VAR_SHORT, // GX_S16 = 3 - VAR_FLOAT, // GX_F32 = 4 -}; - struct AttributeFormat { - VarType type; + ComponentFormat type; int components; int offset; bool enable; diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index a1abacc4c6..63734db384 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -14,7 +14,7 @@ #include "VideoCommon/OpcodeDecoding.h" -#include "Common/CommonTypes.h" +#include "Common/Assert.h" #include "Common/Logging/Log.h" #include "Core/FifoPlayer/FifoRecorder.h" #include "Core/HW/Memmap.h" @@ -24,55 +24,15 @@ #include "VideoCommon/DataReader.h" #include "VideoCommon/Fifo.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/XFMemory.h" +#include "VideoCommon/XFStructs.h" namespace OpcodeDecoder { -namespace -{ -bool s_is_fifo_error_seen = false; - -u32 InterpretDisplayList(u32 address, u32 size) -{ - u8* start_address; - - if (Fifo::UseDeterministicGPUThread()) - start_address = static_cast(Fifo::PopFifoAuxBuffer(size)); - else - start_address = Memory::GetPointer(address); - - u32 cycles = 0; - - // Avoid the crash if Memory::GetPointer failed .. - if (start_address != nullptr) - { - // temporarily swap dl and non-dl (small "hack" for the stats) - g_stats.SwapDL(); - - Run(DataReader(start_address, start_address + size), &cycles, true); - INCSTAT(g_stats.this_frame.num_dlists_called); - - // un-swap - g_stats.SwapDL(); - } - - return cycles; -} - -void InterpretDisplayListPreprocess(u32 address, u32 size) -{ - u8* const start_address = Memory::GetPointer(address); - - Fifo::PushFifoAuxBuffer(start_address, size); - - if (start_address == nullptr) - return; - - Run(DataReader(start_address, start_address + size), nullptr, true); -} -} // Anonymous namespace - +static bool s_is_fifo_error_seen = false; bool g_record_fifo_data = false; void Init() @@ -81,202 +41,228 @@ void Init() } template -u8* Run(DataReader src, u32* cycles, bool in_display_list) +class RunCallback final : public Callback { - u32 total_cycles = 0; - u8* opcode_start = nullptr; - - const auto finish_up = [cycles, &opcode_start, &total_cycles] { - if (cycles != nullptr) - { - *cycles = total_cycles; - } - return opcode_start; - }; - - while (true) +public: + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) { - opcode_start = src.GetPointer(); + m_cycles += 18 + 6 * count; - if (!src.size()) - return finish_up(); - - const u8 cmd_byte = src.Read(); - switch (cmd_byte) - { - case GX_NOP: - total_cycles += 6; // Hm, this means that we scan over nop streams pretty slowly... - break; - - case GX_UNKNOWN_RESET: - total_cycles += 6; // Datel software uses this command - DEBUG_LOG_FMT(VIDEO, "GX Reset?: {:08x}", cmd_byte); - break; - - case GX_LOAD_CP_REG: - { - if (src.size() < 1 + 4) - return finish_up(); - - total_cycles += 12; - - const u8 sub_cmd = src.Read(); - const u32 value = src.Read(); - LoadCPReg(sub_cmd, value, is_preprocess); - if constexpr (!is_preprocess) - INCSTAT(g_stats.this_frame.num_cp_loads); - } - break; - - case GX_LOAD_XF_REG: - { - if (src.size() < 4) - return finish_up(); - - const u32 cmd2 = src.Read(); - const u32 transfer_size = ((cmd2 >> 16) & 15) + 1; - if (src.size() < transfer_size * sizeof(u32)) - return finish_up(); - - total_cycles += 18 + 6 * transfer_size; - - if constexpr (!is_preprocess) - { - const u32 xf_address = cmd2 & 0xFFFF; - LoadXFReg(transfer_size, xf_address, src); - - INCSTAT(g_stats.this_frame.num_xf_loads); - } - src.Skip(transfer_size); - } - break; - - case GX_LOAD_INDX_A: // Used for position matrices - case GX_LOAD_INDX_B: // Used for normal matrices - case GX_LOAD_INDX_C: // Used for postmatrices - case GX_LOAD_INDX_D: // Used for lights - { - if (src.size() < 4) - return finish_up(); - - total_cycles += 6; - - // Map the command byte to its ref array. - // GX_LOAD_INDX_A (32) -> 0xC - // GX_LOAD_INDX_B (40) -> 0xD - // GX_LOAD_INDX_C (48) -> 0xE - // GX_LOAD_INDX_D (56) -> 0xF - const int ref_array = (cmd_byte / 8) + 8; - - if constexpr (is_preprocess) - PreprocessIndexedXF(src.Read(), ref_array); - else - LoadIndexedXF(src.Read(), ref_array); - } - break; - - case GX_CMD_CALL_DL: - { - if (src.size() < 8) - return finish_up(); - - const u32 address = src.Read(); - const u32 count = src.Read(); - - if (in_display_list) - { - total_cycles += 6; - INFO_LOG_FMT(VIDEO, "recursive display list detected"); - } - else - { - if constexpr (is_preprocess) - InterpretDisplayListPreprocess(address, count); - else - total_cycles += 6 + InterpretDisplayList(address, count); - } - } - break; - - case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after - // that - total_cycles += 6; - DEBUG_LOG_FMT(VIDEO, "GX 0x44: {:08x}", cmd_byte); - break; - - case GX_CMD_INVL_VC: // Invalidate Vertex Cache - total_cycles += 6; - DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)"); - break; - - case GX_LOAD_BP_REG: - // In skipped_frame case: We have to let BP writes through because they set - // tokens and stuff. TODO: Call a much simplified LoadBPReg instead. - { - if (src.size() < 4) - return finish_up(); - - total_cycles += 12; - - const u32 bp_cmd = src.Read(); - if constexpr (is_preprocess) - { - LoadBPRegPreprocess(bp_cmd, total_cycles); - } - else - { - LoadBPReg(bp_cmd, total_cycles); - INCSTAT(g_stats.this_frame.num_bp_loads); - } - } - break; - - // draw primitives - default: - if ((cmd_byte & 0xC0) == 0x80) - { - // load vertices - if (src.size() < 2) - return finish_up(); - - const u16 num_vertices = src.Read(); - const int bytes = VertexLoaderManager::RunVertices( - cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, num_vertices, src, is_preprocess); - - if (bytes < 0) - return finish_up(); - - src.Skip(bytes); - - // 4 GPU ticks per vertex, 3 CPU ticks per GPU tick - total_cycles += num_vertices * 4 * 3 + 6; - } - else - { - if (!s_is_fifo_error_seen) - CommandProcessor::HandleUnknownOpcode(cmd_byte, opcode_start, is_preprocess); - ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", cmd_byte, - fmt::ptr(opcode_start), is_preprocess ? "yes" : "no"); - s_is_fifo_error_seen = true; - total_cycles += 1; - } - break; - } - - // Display lists get added directly into the FIFO stream if constexpr (!is_preprocess) { - if (g_record_fifo_data && cmd_byte != GX_CMD_CALL_DL) + LoadXFReg(address, count, data); + + INCSTAT(g_stats.this_frame.num_xf_loads); + } + } + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) + { + m_cycles += 12; + const u8 sub_command = command & CP_COMMAND_MASK; + if constexpr (!is_preprocess) + { + if (sub_command == MATINDEX_A) + VertexShaderManager::SetTexMatrixChangedA(value); + else if (sub_command == MATINDEX_B) + VertexShaderManager::SetTexMatrixChangedB(value); + else if (sub_command == VCD_LO || sub_command == VCD_HI) { - const u8* const opcode_end = src.GetPointer(); - FifoRecorder::GetInstance().WriteGPCommand(opcode_start, u32(opcode_end - opcode_start)); + VertexLoaderManager::g_main_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG); + VertexLoaderManager::g_bases_dirty = true; + } + else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B || + sub_command == CP_VAT_REG_C) + { + VertexLoaderManager::g_main_vat_dirty[command & CP_VAT_MASK] = true; + } + else if (sub_command == ARRAY_BASE) + { + VertexLoaderManager::g_bases_dirty = true; + } + + INCSTAT(g_stats.this_frame.num_cp_loads); + } + else if constexpr (is_preprocess) + { + if (sub_command == VCD_LO || sub_command == VCD_HI) + { + VertexLoaderManager::g_preprocess_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG); + } + else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B || + sub_command == CP_VAT_REG_C) + { + VertexLoaderManager::g_preprocess_vat_dirty[command & CP_VAT_MASK] = true; + } + } + GetCPState().LoadCPReg(command, value); + } + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) + { + m_cycles += 12; + + if constexpr (is_preprocess) + { + LoadBPRegPreprocess(command, value, m_cycles); + } + else + { + LoadBPReg(command, value, m_cycles); + INCSTAT(g_stats.this_frame.num_bp_loads); + } + } + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) + { + m_cycles += 6; + + if constexpr (is_preprocess) + PreprocessIndexedXF(array, index, address, size); + else + LoadIndexedXF(array, index, address, size); + } + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, const u8* vertex_data)) + { + // load vertices + const u32 size = vertex_size * num_vertices; + + // HACK + DataReader src{const_cast(vertex_data), const_cast(vertex_data) + size}; + const u32 bytes = + VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src, is_preprocess); + + ASSERT(bytes == size); + + // 4 GPU ticks per vertex, 3 CPU ticks per GPU tick + m_cycles += num_vertices * 4 * 3 + 6; + } + // This can't be inlined since it calls Run, which makes it recursive + // m_in_display_list prevents it from actually recursing infinitely, but there's no real benefit + // to inlining Run for the display list directly. + OPCODE_CALLBACK_NOINLINE(void OnDisplayList(u32 address, u32 size)) + { + m_cycles += 6; + + if (m_in_display_list) + { + WARN_LOG_FMT(VIDEO, "recursive display list detected"); + } + else + { + m_in_display_list = true; + + if constexpr (is_preprocess) + { + const u8* const start_address = Memory::GetPointer(address); + + Fifo::PushFifoAuxBuffer(start_address, size); + + if (start_address != nullptr) + { + Run(start_address, size, *this); + } + } + else + { + const u8* start_address; + + if (Fifo::UseDeterministicGPUThread()) + start_address = static_cast(Fifo::PopFifoAuxBuffer(size)); + else + start_address = Memory::GetPointer(address); + + // Avoid the crash if Memory::GetPointer failed .. + if (start_address != nullptr) + { + // temporarily swap dl and non-dl (small "hack" for the stats) + g_stats.SwapDL(); + + Run(start_address, size, *this); + INCSTAT(g_stats.this_frame.num_dlists_called); + + // un-swap + g_stats.SwapDL(); + } + } + + m_in_display_list = false; + } + } + OPCODE_CALLBACK(void OnNop(u32 count)) + { + m_cycles += 6 * count; // Hm, this means that we scan over nop streams pretty slowly... + } + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) + { + if (static_cast(opcode) == Opcode::GX_UNKNOWN_RESET) + { + // Datel software uses this command + m_cycles += 6; + DEBUG_LOG_FMT(VIDEO, "GX Reset?"); + } + else if (static_cast(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS) + { + // 'Zelda Four Swords' calls it and checks the metrics registers after that + m_cycles += 6; + DEBUG_LOG_FMT(VIDEO, "GX 0x44"); + } + else if (static_cast(opcode) == Opcode::GX_CMD_INVL_VC) + { + // Invalidate Vertex Cache + m_cycles += 6; + DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)"); + } + else + { + if (!s_is_fifo_error_seen) + CommandProcessor::HandleUnknownOpcode(opcode, data, is_preprocess); + ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", opcode, + fmt::ptr(data), is_preprocess ? "yes" : "no"); + s_is_fifo_error_seen = true; + m_cycles += 1; + } + } + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) + { + ASSERT(size >= 1); + if constexpr (!is_preprocess) + { + // Display lists get added directly into the FIFO stream since this same callback is used to + // process them. + if (g_record_fifo_data && static_cast(data[0]) != Opcode::GX_CMD_CALL_DL) + { + FifoRecorder::GetInstance().WriteGPCommand(data, size); } } } + + OPCODE_CALLBACK(CPState& GetCPState()) + { + if constexpr (is_preprocess) + return g_preprocess_cp_state; + else + return g_main_cp_state; + } + + u32 m_cycles = 0; + bool m_in_display_list = false; +}; + +template +u8* RunFifo(DataReader src, u32* cycles) +{ + using CallbackT = RunCallback; + auto callback = CallbackT{}; + u32 size = Run(src.GetPointer(), static_cast(src.size()), callback); + + if (cycles != nullptr) + *cycles = callback.m_cycles; + + src.Skip(size); + return src.GetPointer(); } -template u8* Run(DataReader src, u32* cycles, bool in_display_list); -template u8* Run(DataReader src, u32* cycles, bool in_display_list); +template u8* RunFifo(DataReader src, u32* cycles); +template u8* RunFifo(DataReader src, u32* cycles); } // namespace OpcodeDecoder diff --git a/Source/Core/VideoCommon/OpcodeDecoding.h b/Source/Core/VideoCommon/OpcodeDecoding.h index 98e5a292de..2d1632efc3 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/OpcodeDecoding.h @@ -3,8 +3,17 @@ #pragma once -#include "Common/CommonTypes.h" +#include +#include "Common/Assert.h" +#include "Common/CommonTypes.h" +#include "Common/EnumFormatter.h" +#include "Common/Inline.h" +#include "Common/Swap.h" +#include "VideoCommon/CPMemory.h" +#include "VideoCommon/VertexLoaderBase.h" + +struct CPState; class DataReader; namespace OpcodeDecoder @@ -12,7 +21,7 @@ namespace OpcodeDecoder // Global flag to signal if FifoRecorder is active. extern bool g_record_fifo_data; -enum +enum class Opcode { GX_NOP = 0x00, GX_UNKNOWN_RESET = 0x01, @@ -27,20 +36,20 @@ enum GX_CMD_CALL_DL = 0x40, GX_CMD_UNKNOWN_METRICS = 0x44, - GX_CMD_INVL_VC = 0x48 + GX_CMD_INVL_VC = 0x48, + + GX_PRIMITIVE_START = 0x80, + GX_PRIMITIVE_END = 0xbf, }; -enum -{ - GX_PRIMITIVE_MASK = 0x78, - GX_PRIMITIVE_SHIFT = 3, - GX_VAT_MASK = 0x07 -}; +constexpr u8 GX_PRIMITIVE_MASK = 0x78; +constexpr u32 GX_PRIMITIVE_SHIFT = 3; +constexpr u8 GX_VAT_MASK = 0x07; // These values are the values extracted using GX_PRIMITIVE_MASK // and GX_PRIMITIVE_SHIFT. // GX_DRAW_QUADS_2 behaves the same way as GX_DRAW_QUADS. -enum +enum class Primitive : u8 { GX_DRAW_QUADS = 0x0, // 0x80 GX_DRAW_QUADS_2 = 0x1, // 0x88 @@ -54,7 +63,232 @@ enum void Init(); +// Interface for the Run and RunCommand functions below. +// The functions themselves are templates so that the compiler generates separate versions for each +// callback (with the callback functions inlined), so the callback doesn't actually need to be +// publicly inherited. +// Compilers don't generate warnings for failed inlining with virtual functions, so this define +// allows disabling the use of virtual functions to generate those warnings. However, this means +// that missing functions will generate errors on their use in RunCommand, instead of in the +// subclass, which can be confusing. +#define OPCODE_CALLBACK_USE_INHERITANCE + +#ifdef OPCODE_CALLBACK_USE_INHERITANCE +#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig override +#define OPCODE_CALLBACK_NOINLINE(sig) sig override +#else +#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig +#define OPCODE_CALLBACK_NOINLINE(sig) sig +#endif +class Callback +{ +#ifdef OPCODE_CALLBACK_USE_INHERITANCE +public: + virtual ~Callback() = default; + + // Called on any XF command. + virtual void OnXF(u16 address, u8 count, const u8* data) = 0; + // Called on any CP command. + // Subclasses should update the CP state with GetCPState().LoadCPReg(command, value) so that + // primitive commands decode properly. + virtual void OnCP(u8 command, u32 value) = 0; + // Called on any BP command. + virtual void OnBP(u8 command, u32 value) = 0; + // Called on any indexed XF load command. + virtual void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size) = 0; + // Called on any primitive command. + virtual void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, u32 vertex_size, + u16 num_vertices, const u8* vertex_data) = 0; + // Called on a display list. + virtual void OnDisplayList(u32 address, u32 size) = 0; + // Called on any NOP commands (which are all merged into a single call). + virtual void OnNop(u32 count) = 0; + // Called on an unknown opcode, or an opcode that is known but not implemented. + // data[0] is opcode. + virtual void OnUnknown(u8 opcode, const u8* data) = 0; + + // Called on ANY command. The first byte of data is the opcode. Size will be at least 1. + // This function is called after one of the above functions is called. + virtual void OnCommand(const u8* data, u32 size) = 0; + + // Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands. + virtual CPState& GetCPState() = 0; +#endif +}; + +namespace detail +{ +// Main logic; split so that the main RunCommand can call OnCommand with the returned size. +template >> +static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback) +{ + if (available < 1) + return 0; + + const Opcode cmd = static_cast(data[0]); + + switch (cmd) + { + case Opcode::GX_NOP: + { + u32 count = 1; + while (count < available && static_cast(data[count]) == Opcode::GX_NOP) + count++; + callback.OnNop(count); + return count; + } + + case Opcode::GX_LOAD_CP_REG: + { + if (available < 6) + return 0; + + const u8 cmd2 = data[1]; + const u32 value = Common::swap32(&data[2]); + + callback.OnCP(cmd2, value); + + return 6; + } + + case Opcode::GX_LOAD_XF_REG: + { + if (available < 5) + return 0; + + const u32 cmd2 = Common::swap32(&data[1]); + const u16 base_address = cmd2 & 0xffff; + + const u16 stream_size_temp = cmd2 >> 16; + ASSERT(stream_size_temp < 16); + const u8 stream_size = (stream_size_temp & 0xf) + 1; + + if (available < u32(5 + stream_size * 4)) + return 0; + + callback.OnXF(base_address, stream_size, &data[5]); + + return 5 + stream_size * 4; + } + + case Opcode::GX_LOAD_INDX_A: // Used for position matrices + case Opcode::GX_LOAD_INDX_B: // Used for normal matrices + case Opcode::GX_LOAD_INDX_C: // Used for postmatrices + case Opcode::GX_LOAD_INDX_D: // Used for lights + { + if (available < 5) + return 0; + + const u32 value = Common::swap32(&data[1]); + + const u32 index = value >> 16; + const u16 address = value & 0xFFF; // TODO: check mask + const u8 size = ((value >> 12) & 0xF) + 1; + + // Map the command byte to its ref array. + // GX_LOAD_INDX_A (32 = 8*4) . CPArray::XF_A (4+8 = 12) + // GX_LOAD_INDX_B (40 = 8*5) . CPArray::XF_B (5+8 = 13) + // GX_LOAD_INDX_C (48 = 8*6) . CPArray::XF_C (6+8 = 14) + // GX_LOAD_INDX_D (56 = 8*7) . CPArray::XF_D (7+8 = 15) + const auto ref_array = static_cast((static_cast(cmd) / 8) + 8); + + callback.OnIndexedLoad(ref_array, index, address, size); + return 5; + } + + case Opcode::GX_CMD_CALL_DL: + { + if (available < 9) + return 0; + + const u32 address = Common::swap32(&data[1]); + const u32 size = Common::swap32(&data[5]); + + callback.OnDisplayList(address, size); + return 9; + } + + case Opcode::GX_LOAD_BP_REG: + { + if (available < 5) + return 0; + + const u8 cmd2 = data[1]; + const u32 value = Common::swap24(&data[2]); + + callback.OnBP(cmd2, value); + + return 5; + } + + default: + if (cmd >= Opcode::GX_PRIMITIVE_START && cmd <= Opcode::GX_PRIMITIVE_END) + { + if (available < 3) + return 0; + + const u8 cmdbyte = static_cast(cmd); + const OpcodeDecoder::Primitive primitive = static_cast( + (cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT); + const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK; + + const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc, + callback.GetCPState().vtx_attr[vat]); + const u16 num_vertices = Common::swap16(&data[1]); + + if (available < 3 + num_vertices * vertex_size) + return 0; + + callback.OnPrimitiveCommand(primitive, vat, vertex_size, num_vertices, &data[3]); + + return 3 + num_vertices * vertex_size; + } + } + + callback.OnUnknown(static_cast(cmd), data); + return 1; +} +} // namespace detail + +template >> +DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback) +{ + const u32 size = detail::RunCommand(data, available, callback); + if (size > 0) + { + callback.OnCommand(data, size); + } + return size; +} + +template >> +DOLPHIN_FORCE_INLINE u32 Run(const u8* data, u32 available, T& callback) +{ + u32 size = 0; + while (size < available) + { + const u32 command_size = RunCommand(&data[size], available - size, callback); + if (command_size == 0) + break; + size += command_size; + } + return size; +} + template -u8* Run(DataReader src, u32* cycles, bool in_display_list); +u8* RunFifo(DataReader src, u32* cycles); } // namespace OpcodeDecoder + +template <> +struct fmt::formatter + : EnumFormatter +{ + static constexpr array_type names = { + "GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)", + "GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP", + "GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES", + "GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS", + }; + formatter() : EnumFormatter(names) {} +}; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6261f57812..6c12a9607a 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -8,6 +8,7 @@ #include "Common/Assert.h" #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Logging/Log.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/BoundingBox.h" @@ -40,7 +41,7 @@ enum : u32 C_PENVCONST_END = C_EFBSCALE + 1 }; -constexpr std::array tev_ksel_table_c{ +constexpr Common::EnumMap tev_ksel_table_c{ "255,255,255", // 1 = 0x00 "223,223,223", // 7_8 = 0x01 "191,191,191", // 3_4 = 0x02 @@ -75,7 +76,7 @@ constexpr std::array tev_ksel_table_c{ I_KCOLORS "[3].aaa", // K3_A = 0x1F }; -constexpr std::array tev_ksel_table_a{ +constexpr Common::EnumMap tev_ksel_table_a{ "255", // 1 = 0x00 "223", // 7_8 = 0x01 "191", // 3_4 = 0x02 @@ -110,7 +111,7 @@ constexpr std::array tev_ksel_table_a{ I_KCOLORS "[3].a", // K3_A = 0x1F }; -constexpr std::array tev_c_input_table{ +constexpr Common::EnumMap tev_c_input_table{ "prev.rgb", // CPREV, "prev.aaa", // APREV, "c0.rgb", // C0, @@ -129,7 +130,7 @@ constexpr std::array tev_c_input_table{ "int3(0,0,0)", // ZERO }; -constexpr std::array tev_a_input_table{ +constexpr Common::EnumMap tev_a_input_table{ "prev.a", // APREV, "c0.a", // A0, "c1.a", // A1, @@ -140,7 +141,7 @@ constexpr std::array tev_a_input_table{ "0", // ZERO }; -constexpr std::array tev_ras_table{ +constexpr Common::EnumMap tev_ras_table{ "iround(col0 * 255.0)", "iround(col1 * 255.0)", "ERROR13", // 2 @@ -151,14 +152,14 @@ constexpr std::array tev_ras_table{ "int4(0, 0, 0, 0)", // zero }; -constexpr std::array tev_c_output_table{ +constexpr Common::EnumMap tev_c_output_table{ "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb", }; -constexpr std::array tev_a_output_table{ +constexpr Common::EnumMap tev_a_output_table{ "prev.a", "c0.a", "c1.a", @@ -1160,11 +1161,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac; if (last_cc.dest != TevOutput::Prev) { - out.Write("\tprev.rgb = {};\n", tev_c_output_table[u32(last_cc.dest.Value())]); + out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]); } if (last_ac.dest != TevOutput::Prev) { - out.Write("\tprev.a = {};\n", tev_a_output_table[u32(last_ac.dest.Value())]); + out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]); } } out.Write("\tprev = prev & 255;\n"); @@ -1277,6 +1278,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, APIType api_type, bool stereo) { + using Common::EnumMap; + const auto& stage = uid_data->stagehash[n]; out.Write("\n\t// TEV stage {}\n", n); @@ -1303,7 +1306,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i // using iindtex{} as the offset coords if (has_ind_stage && tevind.bs != IndTexBumpAlpha::Off) { - static constexpr std::array tev_ind_alpha_sel{ + static constexpr EnumMap tev_ind_alpha_sel{ "", "x", "y", @@ -1316,16 +1319,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i // https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L3038-L3041 // https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L790-L800 - static constexpr std::array tev_ind_alpha_shift{ + static constexpr EnumMap tev_ind_alpha_shift{ '0', // ITF_8: 0bXXXXXYYY -> 0bXXXXX000? No shift? '5', // ITF_5: 0bIIIIIAAA -> 0bAAA00000, shift of 5 '4', // ITF_4: 0bIIIIAAAA -> 0bAAAA0000, shift of 4 '3', // ITF_3: 0bIIIAAAAA -> 0bAAAAA000, shift of 3 }; - out.Write("\talphabump = (iindtex{}.{} << {}) & 248;\n", tevind.bt.Value(), - tev_ind_alpha_sel[u32(tevind.bs.Value())], - tev_ind_alpha_shift[u32(tevind.fmt.Value())]); + out.Write("\talphabump = (iindtex{}.{} << {}) & 248;\n", tevind.bt, + tev_ind_alpha_sel[tevind.bs], tev_ind_alpha_shift[tevind.fmt]); } else { @@ -1335,23 +1337,23 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i if (has_ind_stage && tevind.matrix_index != IndMtxIndex::Off) { // format - static constexpr std::array tev_ind_fmt_shift{ + static constexpr EnumMap tev_ind_fmt_shift{ '0', // ITF_8: 0bXXXXXXXX -> 0bXXXXXXXX, no shift '3', // ITF_5: 0bIIIIIAAA -> 0b000IIIII, shift of 3 '4', // ITF_4: 0bIIIIAAAA -> 0b0000IIII, shift of 4 '5', // ITF_3: 0bIIIAAAAA -> 0b00000III, shift of 5 }; - out.Write("\tint3 iindtevcrd{} = iindtex{} >> {};\n", n, tevind.bt.Value(), - tev_ind_fmt_shift[u32(tevind.fmt.Value())]); + out.Write("\tint3 iindtevcrd{} = iindtex{} >> {};\n", n, tevind.bt, + tev_ind_fmt_shift[tevind.fmt]); // bias - TODO: Check if this needs to be this complicated... // indexed by bias - static constexpr std::array tev_ind_bias_field{ + static constexpr EnumMap tev_ind_bias_field{ "", "x", "y", "xy", "z", "xz", "yz", "xyz", }; // indexed by fmt - static constexpr std::array tev_ind_bias_add{ + static constexpr EnumMap tev_ind_bias_add{ "-128", "1", "1", @@ -1361,22 +1363,19 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i if (tevind.bias == IndTexBias::S || tevind.bias == IndTexBias::T || tevind.bias == IndTexBias::U) { - out.Write("\tiindtevcrd{}.{} += int({});\n", n, - tev_ind_bias_field[u32(tevind.bias.Value())], - tev_ind_bias_add[u32(tevind.fmt.Value())]); + out.Write("\tiindtevcrd{}.{} += int({});\n", n, tev_ind_bias_field[tevind.bias], + tev_ind_bias_add[tevind.fmt]); } else if (tevind.bias == IndTexBias::ST || tevind.bias == IndTexBias::SU || tevind.bias == IndTexBias::TU_) { - out.Write("\tiindtevcrd{0}.{1} += int2({2}, {2});\n", n, - tev_ind_bias_field[u32(tevind.bias.Value())], - tev_ind_bias_add[u32(tevind.fmt.Value())]); + out.Write("\tiindtevcrd{0}.{1} += int2({2}, {2});\n", n, tev_ind_bias_field[tevind.bias], + tev_ind_bias_add[tevind.fmt]); } else if (tevind.bias == IndTexBias::STU) { out.Write("\tiindtevcrd{0}.{1} += int3({2}, {2}, {2});\n", n, - tev_ind_bias_field[u32(tevind.bias.Value())], - tev_ind_bias_add[u32(tevind.fmt.Value())]); + tev_ind_bias_field[tevind.bias], tev_ind_bias_add[tevind.fmt]); } // Multiplied by 2 because each matrix has two rows. @@ -1535,7 +1534,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i '\0', }; - out.Write("\trastemp = {}.{};\n", tev_ras_table[u32(stage.tevorders_colorchan)], rasswap); + out.Write("\trastemp = {}.{};\n", tev_ras_table[stage.tevorders_colorchan], rasswap); } if (stage.tevorders_enable && uid_data->genMode_numtexgens > 0) @@ -1567,8 +1566,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i cc.d == TevColorArg::Konst || ac.a == TevAlphaArg::Konst || ac.b == TevAlphaArg::Konst || ac.c == TevAlphaArg::Konst || ac.d == TevAlphaArg::Konst) { - out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[u32(stage.tevksel_kc)], - tev_ksel_table_a[u32(stage.tevksel_ka)]); + out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[stage.tevksel_kc], + tev_ksel_table_a[stage.tevksel_ka]); if (u32(stage.tevksel_kc) > 7) { @@ -1599,51 +1598,50 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VECTOR_BITWISE_AND)) { - out.Write("\ttevin_a = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.a.Value())], - tev_a_input_table[u32(ac.a.Value())]); - out.Write("\ttevin_b = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.b.Value())], - tev_a_input_table[u32(ac.b.Value())]); - out.Write("\ttevin_c = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.c.Value())], - tev_a_input_table[u32(ac.c.Value())]); + out.Write("\ttevin_a = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.a], + tev_a_input_table[ac.a]); + out.Write("\ttevin_b = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.b], + tev_a_input_table[ac.b]); + out.Write("\ttevin_c = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.c], + tev_a_input_table[ac.c]); } else { - out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", - tev_c_input_table[u32(cc.a.Value())], tev_a_input_table[u32(ac.a.Value())]); - out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", - tev_c_input_table[u32(cc.b.Value())], tev_a_input_table[u32(ac.b.Value())]); - out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", - tev_c_input_table[u32(cc.c.Value())], tev_a_input_table[u32(ac.c.Value())]); + out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.a], + tev_a_input_table[ac.a]); + out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.b], + tev_a_input_table[ac.b]); + out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.c], + tev_a_input_table[ac.c]); } - out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[u32(cc.d.Value())], - tev_a_input_table[u32(ac.d.Value())]); + out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[cc.d], tev_a_input_table[ac.d]); out.Write("\t// color combine\n"); - out.Write("\t{} = clamp(", tev_c_output_table[u32(cc.dest.Value())]); + out.Write("\t{} = clamp(", tev_c_output_table[cc.dest]); if (cc.bias != TevBias::Compare) { WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.scale, false); } else { - static constexpr std::array function_table{ - "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TevCompareMode::R8, GT - "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // R8, TevComparison::EQ - "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : " - "int3(0,0,0))", // GR16, GT - "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : " - "int3(0,0,0))", // GR16, EQ - "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : " - "int3(0,0,0))", // BGR24, GT - "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : " - "int3(0,0,0))", // BGR24, EQ - "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // RGB8, GT - "((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // RGB8, EQ + static constexpr EnumMap tev_rgb_comparison_gt{ + "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TevCompareMode::R8 + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // GR16 + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // BGR24 + "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // RGB8 }; - const u32 mode = (u32(cc.compare_mode.Value()) << 1) | u32(cc.comparison.Value()); - out.Write(" tevin_d.rgb + "); - out.Write("{}", function_table[mode]); + static constexpr EnumMap tev_rgb_comparison_eq{ + "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0))", // TevCompareMode::R8 + "((idot(tevin_a.rgb,comp16) == idot(tevin_b.rgb,comp16)) ? tevin_c.rgb : int3(0,0,0))", // GR16 + "((idot(tevin_a.rgb,comp24) == idot(tevin_b.rgb,comp24)) ? tevin_c.rgb : int3(0,0,0))", // BGR24 + "((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // RGB8 + }; + + if (cc.comparison == TevComparison::EQ) + out.Write(" tevin_d.rgb + {}", tev_rgb_comparison_eq[cc.compare_mode]); + else + out.Write(" tevin_d.rgb + {}", tev_rgb_comparison_gt[cc.compare_mode]); } if (cc.clamp) out.Write(", int3(0,0,0), int3(255,255,255))"); @@ -1652,27 +1650,31 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i out.Write(";\n"); out.Write("\t// alpha combine\n"); - out.Write("\t{} = clamp(", tev_a_output_table[u32(ac.dest.Value())]); + out.Write("\t{} = clamp(", tev_a_output_table[ac.dest]); if (ac.bias != TevBias::Compare) { WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.scale, true); } else { - static constexpr std::array function_table{ - "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8, GT - "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // R8, TevComparison::EQ - "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, GT - "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, EQ - "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, GT - "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, EQ - "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // A8, GT - "((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // A8, EQ + static constexpr EnumMap tev_a_comparison_gt{ + "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8 + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16 + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24 + "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // A8 }; - const u32 mode = (u32(ac.compare_mode.Value()) << 1) | u32(ac.comparison.Value()); - out.Write(" tevin_d.a + "); - out.Write("{}", function_table[mode]); + static constexpr EnumMap tev_a_comparison_eq{ + "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8 + "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, + "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, + "((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)", // A8 + }; + + if (ac.comparison == TevComparison::EQ) + out.Write(" tevin_d.a + {}", tev_a_comparison_eq[ac.compare_mode]); + else + out.Write(" tevin_d.a + {}", tev_a_comparison_gt[ac.compare_mode]); } if (ac.clamp) out.Write(", 0, 255)"); @@ -1685,36 +1687,33 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op, bool clamp, TevScale scale, bool alpha) { - static constexpr std::array tev_scale_table_left{ + static constexpr Common::EnumMap tev_scale_table_left{ "", // Scale1 " << 1", // Scale2 " << 2", // Scale4 "", // Divide2 }; - static constexpr std::array tev_scale_table_right{ + static constexpr Common::EnumMap tev_scale_table_right{ "", // Scale1 "", // Scale2 "", // Scale4 " >> 1", // Divide2 }; - // indexed by 2*op+(scale==Divide2) - static constexpr std::array tev_lerp_bias{ - "", + static constexpr Common::EnumMap tev_lerp_bias{ " + 128", - "", " + 127", }; - static constexpr std::array tev_bias_table{ + static constexpr Common::EnumMap tev_bias_table{ "", // Zero, " + 128", // AddHalf, " - 128", // SubHalf, "", }; - static constexpr std::array tev_op_table{ + static constexpr Common::EnumMap tev_op_table{ '+', // TevOp::Add = 0, '-', // TevOp::Sub = 1, }; @@ -1724,17 +1723,16 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBia // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255 // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy // - a rounding bias is added before dividing by 256 - out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[u32(bias)], - tev_scale_table_left[u32(scale)]); - out.Write(" {} ", tev_op_table[u32(op)]); - out.Write("(((((tevin_a.{}<<8) + (tevin_b.{}-tevin_a.{})*(tevin_c.{}+(tevin_c.{}>>7))){}){})>>8)", - components, components, components, components, components, - tev_scale_table_left[u32(scale)], - tev_lerp_bias[2 * u32(op) + ((scale == TevScale::Divide2) == alpha)]); - out.Write("){}", tev_scale_table_right[u32(scale)]); + out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[bias], tev_scale_table_left[scale]); + out.Write(" {} ", tev_op_table[op]); + out.Write("(((((tevin_a.{0}<<8) + " + "(tevin_b.{0}-tevin_a.{0})*(tevin_c.{0}+(tevin_c.{0}>>7))){1}){2})>>8)", + components, tev_scale_table_left[scale], + ((scale == TevScale::Divide2) == alpha) ? tev_lerp_bias[op] : ""); + out.Write("){}", tev_scale_table_right[scale]); } -constexpr std::array tev_alpha_funcs_table{ +constexpr Common::EnumMap tev_alpha_funcs_table{ "(false)", // CompareMode::Never "(prev.a < {})", // CompareMode::Less "(prev.a == {})", // CompareMode::Equal @@ -1745,7 +1743,7 @@ constexpr std::array tev_alpha_funcs_table{ "(true)" // CompareMode::Always }; -constexpr std::array tev_alpha_funclogic_table{ +constexpr Common::EnumMap tev_alpha_funclogic_table{ " && ", // and " || ", // or " != ", // xor @@ -1763,9 +1761,9 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat const auto write_alpha_func = [&out](CompareMode mode, std::string_view ref) { const bool has_no_arguments = mode == CompareMode::Never || mode == CompareMode::Always; if (has_no_arguments) - out.Write("{}", tev_alpha_funcs_table[u32(mode)]); + out.Write("{}", tev_alpha_funcs_table[mode]); else - out.Write(tev_alpha_funcs_table[u32(mode)], ref); + out.Write(tev_alpha_funcs_table[mode], ref); }; out.SetConstantsUsed(C_ALPHA, C_ALPHA); @@ -1779,7 +1777,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat write_alpha_func(uid_data->alpha_test_comp0, alpha_ref[0]); // Lookup the logic op - out.Write("{}", tev_alpha_funclogic_table[u32(uid_data->alpha_test_logic)]); + out.Write("{}", tev_alpha_funclogic_table[uid_data->alpha_test_logic]); // Lookup the second component from the alpha function table write_alpha_func(uid_data->alpha_test_comp1, alpha_ref[1]); @@ -1809,7 +1807,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat out.Write("\t}}\n"); } -constexpr std::array tev_fog_funcs_table{ +constexpr Common::EnumMap tev_fog_funcs_table{ "", // No Fog "", // ? "", // Linear @@ -1866,7 +1864,7 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) if (uid_data->fog_fsel >= FogType::Exp) { - out.Write("{}", tev_fog_funcs_table[u32(uid_data->fog_fsel)]); + out.Write("{}", tev_fog_funcs_table[uid_data->fog_fsel]); } else { @@ -1919,7 +1917,8 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) { if (uid_data->blend_enable) { - static constexpr std::array blend_src_factor{ + using Common::EnumMap; + static constexpr EnumMap blend_src_factor{ "float3(0,0,0);", // ZERO "float3(1,1,1);", // ONE "initial_ocol0.rgb;", // DSTCLR @@ -1929,7 +1928,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "initial_ocol0.aaa;", // DSTALPHA "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA }; - static constexpr std::array blend_src_factor_alpha{ + static constexpr EnumMap blend_src_factor_alpha{ "0.0;", // ZERO "1.0;", // ONE "initial_ocol0.a;", // DSTCLR @@ -1939,7 +1938,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "initial_ocol0.a;", // DSTALPHA "1.0 - initial_ocol0.a;", // INVDSTALPHA }; - static constexpr std::array blend_dst_factor{ + static constexpr EnumMap blend_dst_factor{ "float3(0,0,0);", // ZERO "float3(1,1,1);", // ONE "ocol0.rgb;", // SRCCLR @@ -1949,7 +1948,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "initial_ocol0.aaa;", // DSTALPHA "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA }; - static constexpr std::array blend_dst_factor_alpha{ + static constexpr EnumMap blend_dst_factor_alpha{ "0.0;", // ZERO "1.0;", // ONE "ocol0.a;", // SRCCLR @@ -1960,13 +1959,11 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "1.0 - initial_ocol0.a;", // INVDSTALPHA }; out.Write("\tfloat4 blend_src;\n"); - out.Write("\tblend_src.rgb = {}\n", blend_src_factor[u32(uid_data->blend_src_factor)]); - out.Write("\tblend_src.a = {}\n", - blend_src_factor_alpha[u32(uid_data->blend_src_factor_alpha)]); + out.Write("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]); + out.Write("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]); out.Write("\tfloat4 blend_dst;\n"); - out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[u32(uid_data->blend_dst_factor)]); - out.Write("\tblend_dst.a = {}\n", - blend_dst_factor_alpha[u32(uid_data->blend_dst_factor_alpha)]); + out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[uid_data->blend_dst_factor]); + out.Write("\tblend_dst.a = {}\n", blend_dst_factor_alpha[uid_data->blend_dst_factor_alpha]); out.Write("\tfloat4 blend_result;\n"); if (uid_data->blend_subtract) diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index e606a464e6..31cfd36aee 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -963,7 +963,7 @@ void Renderer::RecordVideoMemory() const u32* xfregs_ptr = reinterpret_cast(&xfmem) + FifoDataFile::XF_MEM_SIZE; u32 xfregs_size = sizeof(XFMemory) / 4 - FifoDataFile::XF_MEM_SIZE; - FillCPMemoryArray(cpmem); + g_main_cp_state.FillCPMemoryArray(cpmem); FifoRecorder::GetInstance().SetVideoMemory(bpmem_ptr, cpmem, xfmem_ptr, xfregs_ptr, xfregs_size, texMem); @@ -986,9 +986,9 @@ bool Renderer::InitializeImGui() ImGui::GetStyle().WindowRounding = 7.0f; PortableVertexDeclaration vdecl = {}; - vdecl.position = {VAR_FLOAT, 2, offsetof(ImDrawVert, pos), true, false}; - vdecl.texcoords[0] = {VAR_FLOAT, 2, offsetof(ImDrawVert, uv), true, false}; - vdecl.colors[0] = {VAR_UNSIGNED_BYTE, 4, offsetof(ImDrawVert, col), true, false}; + vdecl.position = {ComponentFormat::Float, 2, offsetof(ImDrawVert, pos), true, false}; + vdecl.texcoords[0] = {ComponentFormat::Float, 2, offsetof(ImDrawVert, uv), true, false}; + vdecl.colors[0] = {ComponentFormat::UByte, 4, offsetof(ImDrawVert, col), true, false}; vdecl.stride = sizeof(ImDrawVert); m_imgui_vertex_format = CreateNativeVertexFormat(vdecl); if (!m_imgui_vertex_format) diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 8b1196cabc..b72dd238ff 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -1095,7 +1095,7 @@ void ShaderCache::QueueUberShaderPipelines() // All attributes will be enabled in GetUberVertexFormat. PortableVertexDeclaration dummy_vertex_decl = {}; dummy_vertex_decl.position.components = 4; - dummy_vertex_decl.position.type = VAR_FLOAT; + dummy_vertex_decl.position.type = ComponentFormat::Float; dummy_vertex_decl.position.enable = true; dummy_vertex_decl.stride = sizeof(float) * 4; NativeVertexFormat* dummy_vertex_format = diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 367a472294..88a303356b 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -13,10 +13,11 @@ #include "Common/BitField.h" #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/StringUtil.h" #include "Common/TypeUtils.h" -enum class APIType; +#include "VideoCommon/VideoCommon.h" /** * Common interface for classes that need to go through the shader generation path @@ -210,6 +211,64 @@ std::string BitfieldExtract(std::string_view source) static_cast(BitFieldT::NumBits())); } +template +void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable, + const Common::EnumMap& values, int indent, + bool break_) +{ + const bool make_switch = (ApiType == APIType::D3D); + + // The second template argument is needed to avoid compile errors from ambiguity with multiple + // enums with the same number of members in GCC prior to 8. See https://godbolt.org/z/xcKaW1seW + // and https://godbolt.org/z/hz7Yqq1P5 + using enum_type = decltype(last_member); + + // {:{}} is used to indent by formatting an empty string with a variable width + if (make_switch) + { + out.Write("{:{}}switch ({}) {{\n", "", indent, variable); + for (u32 i = 0; i <= static_cast(last_member); i++) + { + const enum_type key = static_cast(i); + + // Assumes existence of an EnumFormatter + out.Write("{:{}}case {:s}:\n", "", indent, key); + // Note that this indentation behaves poorly for multi-line code + if (!values[key].empty()) + out.Write("{:{}} {}\n", "", indent, values[key]); + if (break_) + out.Write("{:{}} break;\n", "", indent); + } + out.Write("{:{}}}}\n", "", indent); + } + else + { + // Generate a tree of if statements recursively + // std::function must be used because auto won't capture before initialization and thus can't be + // used recursively + std::function BuildTree = [&](u32 cur_indent, u32 low, u32 high) { + // Each generated statement is for low <= x < high + if (high == low + 1) + { + // Down to 1 case (low <= x < low + 1 means x == low) + const enum_type key = static_cast(low); + // Note that this indentation behaves poorly for multi-line code + out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key); + } + else + { + u32 mid = low + ((high - low) / 2); + out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid); + BuildTree(cur_indent + 2, low, mid); + out.Write("{:{}}}} else {{\n", "", cur_indent); + BuildTree(cur_indent + 2, mid, high); + out.Write("{:{}}}}\n", "", cur_indent); + } + }; + BuildTree(indent, 0, static_cast(last_member) + 1); + } +} + // Constant variable names #define I_COLORS "color" #define I_KCOLORS "k" diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index c774b6d2d1..3041e30473 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -1300,42 +1300,30 @@ TextureCacheBase::GetTexture(const int textureCacheSafetyColorSampleSize, Textur // Search the texture cache for textures by address // // Find all texture cache entries for the current texture address, and decide whether to use one - // of - // them, or to create a new one + // of them, or to create a new one // // In most cases, the fastest way is to use only one texture cache entry for the same address. - // Usually, - // when a texture changes, the old version of the texture is unlikely to be used again. If there - // were - // new cache entries created for normal texture updates, there would be a slowdown due to a huge - // amount - // of unused cache entries. Also thanks to texture pooling, overwriting an existing cache entry is - // faster than creating a new one from scratch. + // Usually, when a texture changes, the old version of the texture is unlikely to be used again. + // If there were new cache entries created for normal texture updates, there would be a slowdown + // due to a huge amount of unused cache entries. Also thanks to texture pooling, overwriting an + // existing cache entry is faster than creating a new one from scratch. // // Some games use the same address for different textures though. If the same cache entry was used - // in - // this case, it would be constantly overwritten, and effectively there wouldn't be any caching - // for - // those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has - // multiple - // sets of fonts on each other stored in a single texture and uses the palette to make different - // characters visible or invisible. In Castlevania 3 some textures are used for 2 different things - // or - // at least in 2 different ways(size 1024x1024 vs 1024x256). + // in this case, it would be constantly overwritten, and effectively there wouldn't be any caching + // for those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has + // multiple sets of fonts on each other stored in a single texture and uses the palette to make + // different characters visible or invisible. In Castlevania 3 some textures are used for 2 + // different things or at least in 2 different ways (size 1024x1024 vs 1024x256). // // To determine whether to use multiple cache entries or a single entry, use the following - // heuristic: - // If the same texture address is used several times during the same frame, assume the address is - // used - // for different purposes and allow creating an additional cache entry. If there's at least one - // entry - // that hasn't been used for the same frame, then overwrite it, in order to keep the cache as - // small as - // possible. If the current texture is found in the cache, use that entry. + // heuristic: If the same texture address is used several times during the same frame, assume the + // address is used for different purposes and allow creating an additional cache entry. If there's + // at least one entry that hasn't been used for the same frame, then overwrite it, in order to + // keep the cache as small as possible. If the current texture is found in the cache, use that + // entry. // // For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else - // it was - // done in vain. + // it was done in vain. auto iter_range = textures_by_address.equal_range(texture_info.GetRawAddress()); TexAddrCache::iterator iter = iter_range.first; TexAddrCache::iterator oldest_entry = iter; diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 36fc6addc1..c7719ec377 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -404,263 +404,95 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "int4 getKonstColor(State s, StageState ss);\n" "\n"); - // The switch statements in these functions appear to get transformed into an if..else chain - // on NVIDIA's OpenGL/Vulkan drivers, resulting in lower performance than the D3D counterparts. - // Transforming the switch into a binary tree of ifs can increase performance by up to 20%. - if (api_type == APIType::D3D) - { - out.Write("// Helper function for Alpha Test\n" - "bool alphaCompare(int a, int b, uint compare) {{\n" - " switch (compare) {{\n" - " case 0u: // NEVER\n" - " return false;\n" - " case 1u: // LESS\n" - " return a < b;\n" - " case 2u: // EQUAL\n" - " return a == b;\n" - " case 3u: // LEQUAL\n" - " return a <= b;\n" - " case 4u: // GREATER\n" - " return a > b;\n" - " case 5u: // NEQUAL;\n" - " return a != b;\n" - " case 6u: // GEQUAL\n" - " return a >= b;\n" - " case 7u: // ALWAYS\n" - " return true;\n" - " }}\n" - "}}\n" - "\n" - "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev.rgb\n" - " return s.Reg[0].rgb;\n" - " case 1u: // prev.aaa\n" - " return s.Reg[0].aaa;\n" - " case 2u: // c0.rgb\n" - " return s.Reg[1].rgb;\n" - " case 3u: // c0.aaa\n" - " return s.Reg[1].aaa;\n" - " case 4u: // c1.rgb\n" - " return s.Reg[2].rgb;\n" - " case 5u: // c1.aaa\n" - " return s.Reg[2].aaa;\n" - " case 6u: // c2.rgb\n" - " return s.Reg[3].rgb;\n" - " case 7u: // c2.aaa\n" - " return s.Reg[3].aaa;\n" - " case 8u:\n" - " return s.TexColor.rgb;\n" - " case 9u:\n" - " return s.TexColor.aaa;\n" - " case 10u:\n" - " return getRasColor(s, ss, colors_0, colors_1).rgb;\n" - " case 11u:\n" - " return getRasColor(s, ss, colors_0, colors_1).aaa;\n" - " case 12u: // One\n" - " return int3(255, 255, 255);\n" - " case 13u: // Half\n" - " return int3(128, 128, 128);\n" - " case 14u:\n" - " return getKonstColor(s, ss).rgb;\n" - " case 15u: // Zero\n" - " return int3(0, 0, 0);\n" - " }}\n" - "}}\n" - "\n" - "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev.a\n" - " return s.Reg[0].a;\n" - " case 1u: // c0.a\n" - " return s.Reg[1].a;\n" - " case 2u: // c1.a\n" - " return s.Reg[2].a;\n" - " case 3u: // c2.a\n" - " return s.Reg[3].a;\n" - " case 4u:\n" - " return s.TexColor.a;\n" - " case 5u:\n" - " return getRasColor(s, ss, colors_0, colors_1).a;\n" - " case 6u:\n" - " return getKonstColor(s, ss).a;\n" - " case 7u: // Zero\n" - " return 0;\n" - " }}\n" - "}}\n" - "\n" - "int4 getTevReg(in State s, uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " return s.Reg[0];\n" - " case 1u: // c0\n" - " return s.Reg[1];\n" - " case 2u: // c1\n" - " return s.Reg[2];\n" - " case 3u: // c2\n" - " return s.Reg[3];\n" - " default: // prev\n" - " return s.Reg[0];\n" - " }}\n" - "}}\n" - "\n" - "void setRegColor(inout State s, uint index, int3 color) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " s.Reg[0].rgb = color;\n" - " break;\n" - " case 1u: // c0\n" - " s.Reg[1].rgb = color;\n" - " break;\n" - " case 2u: // c1\n" - " s.Reg[2].rgb = color;\n" - " break;\n" - " case 3u: // c2\n" - " s.Reg[3].rgb = color;\n" - " break;\n" - " }}\n" - "}}\n" - "\n" - "void setRegAlpha(inout State s, uint index, int alpha) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " s.Reg[0].a = alpha;\n" - " break;\n" - " case 1u: // c0\n" - " s.Reg[1].a = alpha;\n" - " break;\n" - " case 2u: // c1\n" - " s.Reg[2].a = alpha;\n" - " break;\n" - " case 3u: // c2\n" - " s.Reg[3].a = alpha;\n" - " break;\n" - " }}\n" - "}}\n" - "\n"); - } - else - { - out.Write( - "// Helper function for Alpha Test\n" - "bool alphaCompare(int a, int b, uint compare) {{\n" - " if (compare < 4u) {{\n" - " if (compare < 2u) {{\n" - " return (compare == 0u) ? (false) : (a < b);\n" - " }} else {{\n" - " return (compare == 2u) ? (a == b) : (a <= b);\n" - " }}\n" - " }} else {{\n" - " if (compare < 6u) {{\n" - " return (compare == 4u) ? (a > b) : (a != b);\n" - " }} else {{\n" - " return (compare == 6u) ? (a >= b) : (true);\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " if (index < 8u) {{\n" - " if (index < 4u) {{\n" - " if (index < 2u) {{\n" - " return (index == 0u) ? s.Reg[0].rgb : s.Reg[0].aaa;\n" - " }} else {{\n" - " return (index == 2u) ? s.Reg[1].rgb : s.Reg[1].aaa;\n" - " }}\n" - " }} else {{\n" - " if (index < 6u) {{\n" - " return (index == 4u) ? s.Reg[2].rgb : s.Reg[2].aaa;\n" - " }} else {{\n" - " return (index == 6u) ? s.Reg[3].rgb : s.Reg[3].aaa;\n" - " }}\n" - " }}\n" - " }} else {{\n" - " if (index < 12u) {{\n" - " if (index < 10u) {{\n" - " return (index == 8u) ? s.TexColor.rgb : s.TexColor.aaa;\n" - " }} else {{\n" - " int4 ras = getRasColor(s, ss, colors_0, colors_1);\n" - " return (index == 10u) ? ras.rgb : ras.aaa;\n" - " }}\n" - " }} else {{\n" - " if (index < 14u) {{\n" - " return (index == 12u) ? int3(255, 255, 255) : int3(128, 128, 128);\n" - " }} else {{\n" - " return (index == 14u) ? getKonstColor(s, ss).rgb : int3(0, 0, 0);\n" - " }}\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " if (index < 4u) {{\n" - " if (index < 2u) {{\n" - " return (index == 0u) ? s.Reg[0].a : s.Reg[1].a;\n" - " }} else {{\n" - " return (index == 2u) ? s.Reg[2].a : s.Reg[3].a;\n" - " }}\n" - " }} else {{\n" - " if (index < 6u) {{\n" - " return (index == 4u) ? s.TexColor.a : getRasColor(s, ss, colors_0, colors_1).a;\n" - " }} else {{\n" - " return (index == 6u) ? getKonstColor(s, ss).a : 0;\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "int4 getTevReg(in State s, uint index) {{\n" - " if (index < 2u) {{\n" - " if (index == 0u) {{\n" - " return s.Reg[0];\n" - " }} else {{\n" - " return s.Reg[1];\n" - " }}\n" - " }} else {{\n" - " if (index == 2u) {{\n" - " return s.Reg[2];\n" - " }} else {{\n" - " return s.Reg[3];\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "void setRegColor(inout State s, uint index, int3 color) {{\n" - " if (index < 2u) {{\n" - " if (index == 0u) {{\n" - " s.Reg[0].rgb = color;\n" - " }} else {{\n" - " s.Reg[1].rgb = color;\n" - " }}\n" - " }} else {{\n" - " if (index == 2u) {{\n" - " s.Reg[2].rgb = color;\n" - " }} else {{\n" - " s.Reg[3].rgb = color;\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "void setRegAlpha(inout State s, uint index, int alpha) {{\n" - " if (index < 2u) {{\n" - " if (index == 0u) {{\n" - " s.Reg[0].a = alpha;\n" - " }} else {{\n" - " s.Reg[1].a = alpha;\n" - " }}\n" - " }} else {{\n" - " if (index == 2u) {{\n" - " s.Reg[2].a = alpha;\n" - " }} else {{\n" - " s.Reg[3].a = alpha;\n" - " }}\n" - " }}\n" - "}}\n" - "\n"); - } + static constexpr Common::EnumMap tev_alpha_funcs_table{ + "return false;", // CompareMode::Never + "return a < b;", // CompareMode::Less + "return a == b;", // CompareMode::Equal + "return a <= b;", // CompareMode::LEqual + "return a > b;", // CompareMode::Greater + "return a != b;", // CompareMode::NEqual + "return a >= b;", // CompareMode::GEqual + "return true;" // CompareMode::Always + }; + + static constexpr Common::EnumMap tev_c_input_table{ + "return s.Reg[0].rgb;", // CPREV, + "return s.Reg[0].aaa;", // APREV, + "return s.Reg[1].rgb;", // C0, + "return s.Reg[1].aaa;", // A0, + "return s.Reg[2].rgb;", // C1, + "return s.Reg[2].aaa;", // A1, + "return s.Reg[3].rgb;", // C2, + "return s.Reg[3].aaa;", // A2, + "return s.TexColor.rgb;", // TEXC, + "return s.TexColor.aaa;", // TEXA, + "return getRasColor(s, ss, colors_0, colors_1).rgb;", // RASC, + "return getRasColor(s, ss, colors_0, colors_1).aaa;", // RASA, + "return int3(255, 255, 255);", // ONE + "return int3(128, 128, 128);", // HALF + "return getKonstColor(s, ss).rgb;", // KONST + "return int3(0, 0, 0);", // ZERO + }; + + static constexpr Common::EnumMap tev_a_input_table{ + "return s.Reg[0].a;", // APREV, + "return s.Reg[1].a;", // A0, + "return s.Reg[2].a;", // A1, + "return s.Reg[3].a;", // A2, + "return s.TexColor.a;", // TEXA, + "return getRasColor(s, ss, colors_0, colors_1).a;", // RASA, + "return getKonstColor(s, ss).a;", // KONST, (hw1 had quarter) + "return 0;", // ZERO + }; + + static constexpr Common::EnumMap tev_regs_lookup_table{ + "return s.Reg[0];", + "return s.Reg[1];", + "return s.Reg[2];", + "return s.Reg[3];", + }; + + static constexpr Common::EnumMap tev_c_set_table{ + "s.Reg[0].rgb = color;", + "s.Reg[1].rgb = color;", + "s.Reg[2].rgb = color;", + "s.Reg[3].rgb = color;", + }; + + static constexpr Common::EnumMap tev_a_set_table{ + "s.Reg[0].a = alpha;", + "s.Reg[1].a = alpha;", + "s.Reg[2].a = alpha;", + "s.Reg[3].a = alpha;", + }; + + out.Write("// Helper function for Alpha Test\n" + "bool alphaCompare(int a, int b, uint compare) {{\n"); + WriteSwitch(out, api_type, "compare", tev_alpha_funcs_table, 2, false); + out.Write("}}\n" + "\n" + "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " + "uint index) {{\n"); + WriteSwitch(out, api_type, "index", tev_c_input_table, 2, false); + out.Write("}}\n" + "\n" + "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " + "uint index) {{\n"); + WriteSwitch(out, api_type, "index", tev_a_input_table, 2, false); + out.Write("}}\n" + "\n" + "int4 getTevReg(in State s, uint index) {{\n"); + WriteSwitch(out, api_type, "index", tev_regs_lookup_table, 2, false); + out.Write("}}\n" + "\n" + "void setRegColor(inout State s, uint index, int3 color) {{\n"); + WriteSwitch(out, api_type, "index", tev_c_set_table, 2, true); + out.Write("}}\n" + "\n" + "void setRegAlpha(inout State s, uint index, int alpha) {{\n"); + WriteSwitch(out, api_type, "index", tev_a_set_table, 2, true); + out.Write("}}\n" + "\n"); // Since the fixed-point texture coodinate variables aren't global, we need to pass // them to the select function. This applies to all backends. @@ -1284,78 +1116,59 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, if (use_shader_blend) { - static constexpr std::array blendSrcFactor{{ - "float3(0,0,0);", // ZERO - "float3(1,1,1);", // ONE - "initial_ocol0.rgb;", // DSTCLR - "float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR - "ocol1.aaa;", // SRCALPHA - "float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA - "initial_ocol0.aaa;", // DSTALPHA - "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA - }}; - static constexpr std::array blendSrcFactorAlpha{{ - "0.0;", // ZERO - "1.0;", // ONE - "initial_ocol0.a;", // DSTCLR - "1.0 - initial_ocol0.a;", // INVDSTCLR - "ocol1.a;", // SRCALPHA - "1.0 - ocol1.a;", // INVSRCALPHA - "initial_ocol0.a;", // DSTALPHA - "1.0 - initial_ocol0.a;", // INVDSTALPHA - }}; - static constexpr std::array blendDstFactor{{ - "float3(0,0,0);", // ZERO - "float3(1,1,1);", // ONE - "ocol0.rgb;", // SRCCLR - "float3(1,1,1) - ocol0.rgb;", // INVSRCCLR - "ocol1.aaa;", // SRCALHA - "float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA - "initial_ocol0.aaa;", // DSTALPHA - "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA - }}; - static constexpr std::array blendDstFactorAlpha{{ - "0.0;", // ZERO - "1.0;", // ONE - "ocol0.a;", // SRCCLR - "1.0 - ocol0.a;", // INVSRCCLR - "ocol1.a;", // SRCALPHA - "1.0 - ocol1.a;", // INVSRCALPHA - "initial_ocol0.a;", // DSTALPHA - "1.0 - initial_ocol0.a;", // INVDSTALPHA - }}; + using Common::EnumMap; + + static constexpr EnumMap blendSrcFactor{ + "blend_src.rgb = float3(0,0,0);", // ZERO + "blend_src.rgb = float3(1,1,1);", // ONE + "blend_src.rgb = initial_ocol0.rgb;", // DSTCLR + "blend_src.rgb = float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR + "blend_src.rgb = ocol1.aaa;", // SRCALPHA + "blend_src.rgb = float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA + "blend_src.rgb = initial_ocol0.aaa;", // DSTALPHA + "blend_src.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + }; + static constexpr EnumMap blendSrcFactorAlpha{ + "blend_src.a = 0.0;", // ZERO + "blend_src.a = 1.0;", // ONE + "blend_src.a = initial_ocol0.a;", // DSTCLR + "blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTCLR + "blend_src.a = ocol1.a;", // SRCALPHA + "blend_src.a = 1.0 - ocol1.a;", // INVSRCALPHA + "blend_src.a = initial_ocol0.a;", // DSTALPHA + "blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA + }; + static constexpr EnumMap blendDstFactor{ + "blend_dst.rgb = float3(0,0,0);", // ZERO + "blend_dst.rgb = float3(1,1,1);", // ONE + "blend_dst.rgb = ocol0.rgb;", // SRCCLR + "blend_dst.rgb = float3(1,1,1) - ocol0.rgb;", // INVSRCCLR + "blend_dst.rgb = ocol1.aaa;", // SRCALHA + "blend_dst.rgb = float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA + "blend_dst.rgb = initial_ocol0.aaa;", // DSTALPHA + "blend_dst.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + }; + static constexpr EnumMap blendDstFactorAlpha{ + "blend_dst.a = 0.0;", // ZERO + "blend_dst.a = 1.0;", // ONE + "blend_dst.a = ocol0.a;", // SRCCLR + "blend_dst.a = 1.0 - ocol0.a;", // INVSRCCLR + "blend_dst.a = ocol1.a;", // SRCALPHA + "blend_dst.a = 1.0 - ocol1.a;", // INVSRCALPHA + "blend_dst.a = initial_ocol0.a;", // DSTALPHA + "blend_dst.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA + }; out.Write(" if (blend_enable) {{\n" - " float4 blend_src;\n" - " switch (blend_src_factor) {{\n"); - for (size_t i = 0; i < blendSrcFactor.size(); i++) - { - out.Write(" case {}u: blend_src.rgb = {}; break;\n", i, blendSrcFactor[i]); - } + " float4 blend_src;\n"); + WriteSwitch(out, api_type, "blend_src_factor", blendSrcFactor, 4, true); + WriteSwitch(out, api_type, "blend_src_factor_alpha", blendSrcFactorAlpha, 4, true); - out.Write(" }}\n" - " switch (blend_src_factor_alpha) {{\n"); - for (size_t i = 0; i < blendSrcFactorAlpha.size(); i++) - { - out.Write(" case {}u: blend_src.a = {}; break;\n", i, blendSrcFactorAlpha[i]); - } - - out.Write(" }}\n" - " float4 blend_dst;\n" - " switch (blend_dst_factor) {{\n"); - for (size_t i = 0; i < blendDstFactor.size(); i++) - { - out.Write(" case {}u: blend_dst.rgb = {}; break;\n", i, blendDstFactor[i]); - } - out.Write(" }}\n" - " switch (blend_dst_factor_alpha) {{\n"); - for (size_t i = 0; i < blendDstFactorAlpha.size(); i++) - { - out.Write(" case {}u: blend_dst.a = {}; break;\n", i, blendDstFactorAlpha[i]); - } + out.Write(" float4 blend_dst;\n"); + WriteSwitch(out, api_type, "blend_dst_factor", blendDstFactor, 4, true); + WriteSwitch(out, api_type, "blend_dst_factor_alpha", blendDstFactorAlpha, 4, true); out.Write( - " }}\n" " float4 blend_result;\n" " if (blend_subtract)\n" " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n" diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index aaf7a477f7..69f669cd4c 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -91,7 +91,7 @@ void VertexLoader::CompileVertexTranslator() m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.offset = nat_offset; - m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.posmtx.type = ComponentFormat::UByte; m_native_vtx_decl.posmtx.integer = true; nat_offset += 4; } @@ -110,7 +110,7 @@ void VertexLoader::CompileVertexTranslator() m_native_vtx_decl.position.components = pos_elements; m_native_vtx_decl.position.enable = true; m_native_vtx_decl.position.offset = nat_offset; - m_native_vtx_decl.position.type = VAR_FLOAT; + m_native_vtx_decl.position.type = ComponentFormat::Float; m_native_vtx_decl.position.integer = false; nat_offset += pos_elements * sizeof(float); @@ -134,7 +134,7 @@ void VertexLoader::CompileVertexTranslator() m_native_vtx_decl.normals[i].components = 3; m_native_vtx_decl.normals[i].enable = true; m_native_vtx_decl.normals[i].offset = nat_offset; - m_native_vtx_decl.normals[i].type = VAR_FLOAT; + m_native_vtx_decl.normals[i].type = ComponentFormat::Float; m_native_vtx_decl.normals[i].integer = false; nat_offset += 12; } @@ -143,7 +143,7 @@ void VertexLoader::CompileVertexTranslator() for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) { m_native_vtx_decl.colors[i].components = 4; - m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.colors[i].type = ComponentFormat::UByte; m_native_vtx_decl.colors[i].integer = false; TPipelineFunction pFunc = @@ -166,7 +166,7 @@ void VertexLoader::CompileVertexTranslator() for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) { m_native_vtx_decl.texcoords[i].offset = nat_offset; - m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; + m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float; m_native_vtx_decl.texcoords[i].integer = false; const auto tc = m_VtxDesc.high.TexCoord[i].Value(); diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index 71fc9e054c..330deef548 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -6,6 +6,7 @@ #include #include "Common/CommonTypes.h" +#include "VideoCommon/CPMemory.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/VertexLoaderManager.h" @@ -59,7 +60,7 @@ VertexLoaderARM64::VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_at WriteProtect(); } -void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute, ARM64Reg reg) +void VertexLoaderARM64::GetVertexAddr(CPArray array, VertexComponentFormat attribute, ARM64Reg reg) { if (IsIndexed(attribute)) { @@ -95,7 +96,7 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute REV16(scratch1_reg, scratch1_reg); } - if (array == ARRAY_POSITION) + if (array == CPArray::Position) { EOR(scratch2_reg, scratch1_reg, attribute == VertexComponentFormat::Index8 ? LogicalImm(0xFF, 32) : @@ -103,17 +104,18 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute m_skip_vertex = CBZ(scratch2_reg); } - LDR(IndexType::Unsigned, scratch2_reg, stride_reg, array * 4); + LDR(IndexType::Unsigned, scratch2_reg, stride_reg, static_cast(array) * 4); MUL(scratch1_reg, scratch1_reg, scratch2_reg); - LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg, array * 8); + LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg, + static_cast(array) * 8); ADD(EncodeRegTo64(reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg)); } else ADD(reg, src_reg, m_src_ofs); } -s32 VertexLoaderARM64::GetAddressImm(int array, VertexComponentFormat attribute, +s32 VertexLoaderARM64::GetAddressImm(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, u32 align) { if (IsIndexed(attribute) || (m_src_ofs > 255 && (m_src_ofs & (align - 1)))) @@ -219,7 +221,7 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm native_format->components = count_out; native_format->enable = true; native_format->offset = m_dst_ofs; - native_format->type = VAR_FLOAT; + native_format->type = ComponentFormat::Float; native_format->integer = false; m_dst_ofs += sizeof(float) * count_out; @@ -403,8 +405,8 @@ void VertexLoaderARM64::GenerateVertexLoader() MOV(skipped_reg, ARM64Reg::WZR); MOV(saved_count, count_reg); - MOVP2R(stride_reg, g_main_cp_state.array_strides); - MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases); + MOVP2R(stride_reg, g_main_cp_state.array_strides.data()); + MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data()); if (need_scale) MOVP2R(scale_reg, scale_factors); @@ -427,7 +429,7 @@ void VertexLoaderARM64::GenerateVertexLoader() m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.offset = m_dst_ofs; - m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.posmtx.type = ComponentFormat::UByte; m_native_vtx_decl.posmtx.integer = true; m_src_ofs += sizeof(u8); m_dst_ofs += sizeof(u32); @@ -448,8 +450,8 @@ void VertexLoaderARM64::GenerateVertexLoader() int load_size = GetLoadSize(load_bytes); load_size <<= 3; - s32 offset = GetAddressImm(ARRAY_POSITION, m_VtxDesc.low.Position, EncodeRegTo64(scratch1_reg), - load_size); + s32 offset = GetAddressImm(CPArray::Position, m_VtxDesc.low.Position, + EncodeRegTo64(scratch1_reg), load_size); ReadVertex(m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements, m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position, offset); } @@ -470,7 +472,7 @@ void VertexLoaderARM64::GenerateVertexLoader() int load_bytes = elem_size * 3; int load_size = GetLoadSize(load_bytes); - offset = GetAddressImm(ARRAY_NORMAL, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg), + offset = GetAddressImm(CPArray::Normal, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg), load_size << 3); if (offset == -1) @@ -488,10 +490,10 @@ void VertexLoaderARM64::GenerateVertexLoader() } } - for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) + for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++) { m_native_vtx_decl.colors[i].components = 4; - m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.colors[i].type = ComponentFormat::UByte; m_native_vtx_decl.colors[i].integer = false; if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent) @@ -501,22 +503,22 @@ void VertexLoaderARM64::GenerateVertexLoader() m_VtxAttr.GetColorFormat(i) == ColorFormat::RGBA4444) align = 2; - s32 offset = GetAddressImm(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i], + s32 offset = GetAddressImm(CPArray::Color0 + i, m_VtxDesc.low.Color[i], EncodeRegTo64(scratch1_reg), align); ReadColor(m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i), offset); m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].enable = true; m_native_vtx_decl.colors[i].offset = m_dst_ofs; - m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.colors[i].type = ComponentFormat::UByte; m_native_vtx_decl.colors[i].integer = false; m_dst_ofs += 4; } } - for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) + for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) { m_native_vtx_decl.texcoords[i].offset = m_dst_ofs; - m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; + m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float; m_native_vtx_decl.texcoords[i].integer = false; int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::S ? 1 : 2; @@ -527,7 +529,7 @@ void VertexLoaderARM64::GenerateVertexLoader() int load_size = GetLoadSize(load_bytes); load_size <<= 3; - s32 offset = GetAddressImm(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i], + s32 offset = GetAddressImm(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i], EncodeRegTo64(scratch1_reg), load_size); u8 scaling_exponent = m_VtxAttr.GetTexFrac(i); ReadVertex(m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements, @@ -538,7 +540,7 @@ void VertexLoaderARM64::GenerateVertexLoader() { m_native_vtx_decl.texcoords[i].components = 3; m_native_vtx_decl.texcoords[i].enable = true; - m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; + m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float; m_native_vtx_decl.texcoords[i].integer = false; LDRB(IndexType::Unsigned, scratch2_reg, src_reg, texmatidx_ofs[i]); diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.h b/Source/Core/VideoCommon/VertexLoaderARM64.h index a2190a6965..eccf3f0ad8 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.h +++ b/Source/Core/VideoCommon/VertexLoaderARM64.h @@ -11,6 +11,7 @@ class DataReader; enum class VertexComponentFormat; enum class ComponentFormat; enum class ColorFormat; +enum class CPArray : u8; class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlock { @@ -25,8 +26,9 @@ private: u32 m_dst_ofs = 0; Arm64Gen::FixupBranch m_skip_vertex; Arm64Gen::ARM64FloatEmitter m_float_emit; - void GetVertexAddr(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg); - s32 GetAddressImm(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, u32 align); + void GetVertexAddr(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg); + s32 GetAddressImm(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, + u32 align); int ReadVertex(VertexComponentFormat attribute, ComponentFormat format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format, s32 offset = -1); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 33af56762f..b0922e5a6e 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -12,16 +12,14 @@ #include #include -#include "Common/Assert.h" #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Logging/Log.h" -#include "Core/DolphinAnalytics.h" #include "Core/HW/Memmap.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/CPMemory.h" -#include "VideoCommon/CommandProcessor.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" @@ -48,14 +46,21 @@ static std::mutex s_vertex_loader_map_lock; static VertexLoaderMap s_vertex_loader_map; // TODO - change into array of pointers. Keep a map of all seen so far. -u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS]; +Common::EnumMap cached_arraybases; + +BitSet8 g_main_vat_dirty; +BitSet8 g_preprocess_vat_dirty; +bool g_bases_dirty; // Main only +u8 g_current_vat; // Main only +std::array g_main_vertex_loaders; +std::array g_preprocess_vertex_loaders; void Init() { MarkAllDirty(); - for (auto& map_entry : g_main_cp_state.vertex_loaders) + for (auto& map_entry : g_main_vertex_loaders) map_entry = nullptr; - for (auto& map_entry : g_preprocess_cp_state.vertex_loaders) + for (auto& map_entry : g_preprocess_vertex_loaders) map_entry = nullptr; SETSTAT(g_stats.num_vertex_loaders, 0); } @@ -70,7 +75,7 @@ void Clear() void UpdateVertexArrayPointers() { // Anything to update? - if (!g_main_cp_state.bases_dirty) + if (!g_bases_dirty) return; // Some games such as Burnout 2 can put invalid addresses into @@ -80,27 +85,28 @@ void UpdateVertexArrayPointers() // 12 through 15 are used for loading data into xfmem. // We also only update the array base if the vertex description states we are going to use it. if (IsIndexed(g_main_cp_state.vtx_desc.low.Position)) - cached_arraybases[ARRAY_POSITION] = - Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_POSITION]); + cached_arraybases[CPArray::Position] = + Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Position]); if (IsIndexed(g_main_cp_state.vtx_desc.low.Normal)) - cached_arraybases[ARRAY_NORMAL] = Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_NORMAL]); + cached_arraybases[CPArray::Normal] = + Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Normal]); - for (size_t i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++) + for (u8 i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++) { if (IsIndexed(g_main_cp_state.vtx_desc.low.Color[i])) - cached_arraybases[ARRAY_COLOR0 + i] = - Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_COLOR0 + i]); + cached_arraybases[CPArray::Color0 + i] = + Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Color0 + i]); } - for (size_t i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++) + for (u8 i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++) { if (IsIndexed(g_main_cp_state.vtx_desc.high.TexCoord[i])) - cached_arraybases[ARRAY_TEXCOORD0 + i] = - Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_TEXCOORD0 + i]); + cached_arraybases[CPArray::TexCoord0 + i] = + Memory::GetPointer(g_main_cp_state.array_bases[CPArray::TexCoord0 + i]); } - g_main_cp_state.bases_dirty = false; + g_bases_dirty = false; } namespace @@ -115,8 +121,8 @@ struct entry void MarkAllDirty() { - g_main_cp_state.attr_dirty = BitSet32::AllTrue(8); - g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8); + g_main_vat_dirty = BitSet8::AllTrue(8); + g_preprocess_vat_dirty = BitSet8::AllTrue(8); } NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl) @@ -140,7 +146,8 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl) std::memset(&new_decl, 0, sizeof(new_decl)); new_decl.stride = decl.stride; - auto MakeDummyAttribute = [](AttributeFormat& attr, VarType type, int components, bool integer) { + auto MakeDummyAttribute = [](AttributeFormat& attr, ComponentFormat type, int components, + bool integer) { attr.type = type; attr.components = components; attr.offset = 0; @@ -158,32 +165,32 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl) if (decl.position.enable) CopyAttribute(new_decl.position, decl.position); else - MakeDummyAttribute(new_decl.position, VAR_FLOAT, 1, false); + MakeDummyAttribute(new_decl.position, ComponentFormat::Float, 1, false); for (size_t i = 0; i < std::size(new_decl.normals); i++) { if (decl.normals[i].enable) CopyAttribute(new_decl.normals[i], decl.normals[i]); else - MakeDummyAttribute(new_decl.normals[i], VAR_FLOAT, 1, false); + MakeDummyAttribute(new_decl.normals[i], ComponentFormat::Float, 1, false); } for (size_t i = 0; i < std::size(new_decl.colors); i++) { if (decl.colors[i].enable) CopyAttribute(new_decl.colors[i], decl.colors[i]); else - MakeDummyAttribute(new_decl.colors[i], VAR_UNSIGNED_BYTE, 4, false); + MakeDummyAttribute(new_decl.colors[i], ComponentFormat::UByte, 4, false); } for (size_t i = 0; i < std::size(new_decl.texcoords); i++) { if (decl.texcoords[i].enable) CopyAttribute(new_decl.texcoords[i], decl.texcoords[i]); else - MakeDummyAttribute(new_decl.texcoords[i], VAR_FLOAT, 1, false); + MakeDummyAttribute(new_decl.texcoords[i], ComponentFormat::Float, 1, false); } if (decl.posmtx.enable) CopyAttribute(new_decl.posmtx, decl.posmtx); else - MakeDummyAttribute(new_decl.posmtx, VAR_UNSIGNED_BYTE, 1, true); + MakeDummyAttribute(new_decl.posmtx, ComponentFormat::UByte, 1, true); return GetOrCreateMatchingFormat(new_decl); } @@ -191,10 +198,12 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl) static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false) { CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state; - state->last_id = vtx_attr_group; + BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty; + auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders; + g_current_vat = vtx_attr_group; VertexLoaderBase* loader; - if (state->attr_dirty[vtx_attr_group]) + if (attr_dirty[vtx_attr_group]) { // We are not allowed to create a native vertex format on preprocessing as this is on the wrong // thread @@ -224,12 +233,12 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal native = g_renderer->CreateNativeVertexFormat(format); loader->m_native_vertex_format = native.get(); } - state->vertex_loaders[vtx_attr_group] = loader; - state->attr_dirty[vtx_attr_group] = false; + vertex_loaders[vtx_attr_group] = loader; + attr_dirty[vtx_attr_group] = false; } else { - loader = state->vertex_loaders[vtx_attr_group]; + loader = vertex_loaders[vtx_attr_group]; } // Lookup pointers for any vertex arrays. @@ -239,7 +248,8 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal return loader; } -int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess) +int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, + bool is_preprocess) { if (!count) return 0; @@ -266,7 +276,8 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. // They still need to go through vertex loading, because we need to calculate a zfreeze refrence // slope. - bool cullall = (bpmem.genMode.cullmode == CullMode::All && primitive < 5); + bool cullall = (bpmem.genMode.cullmode == CullMode::All && + primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES); DataReader dst = g_vertex_manager->PrepareForAdditionalData( primitive, count, loader->m_native_vtx_decl.stride, cullall); @@ -287,147 +298,3 @@ NativeVertexFormat* GetCurrentVertexFormat() } } // namespace VertexLoaderManager - -void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess) -{ - bool update_global_state = !is_preprocess; - CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state; - switch (sub_cmd & CP_COMMAND_MASK) - { - case UNKNOWN_00: - case UNKNOWN_10: - case UNKNOWN_20: - if (!(sub_cmd == UNKNOWN_20 && value == 0)) - { - // All titles using libogc or the official SDK issue 0x20 with value=0 on startup - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND); - DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}", - sub_cmd); - } - break; - - case MATINDEX_A: - if (sub_cmd != MATINDEX_A) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, - "CP MATINDEX_A: an exact value of {:02x} was expected " - "but instead a value of {:02x} was seen", - MATINDEX_A, sub_cmd); - } - - if (update_global_state) - VertexShaderManager::SetTexMatrixChangedA(value); - break; - - case MATINDEX_B: - if (sub_cmd != MATINDEX_B) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, - "CP MATINDEX_B: an exact value of {:02x} was expected " - "but instead a value of {:02x} was seen", - MATINDEX_B, sub_cmd); - } - - if (update_global_state) - VertexShaderManager::SetTexMatrixChangedB(value); - break; - - case VCD_LO: - if (sub_cmd != VCD_LO) // Stricter than YAGCD - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, - "CP VCD_LO: an exact value of {:02x} was expected " - "but instead a value of {:02x} was seen", - VCD_LO, sub_cmd); - } - - state->vtx_desc.low.Hex = value; - state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG); - state->bases_dirty = true; - break; - - case VCD_HI: - if (sub_cmd != VCD_HI) // Stricter than YAGCD - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, - "CP VCD_HI: an exact value of {:02x} was expected " - "but instead a value of {:02x} was seen", - VCD_HI, sub_cmd); - } - - state->vtx_desc.high.Hex = value; - state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG); - state->bases_dirty = true; - break; - - case CP_VAT_REG_A: - if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A); - } - state->vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value; - state->attr_dirty[sub_cmd & CP_VAT_MASK] = true; - break; - - case CP_VAT_REG_B: - if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B); - } - state->vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value; - state->attr_dirty[sub_cmd & CP_VAT_MASK] = true; - break; - - case CP_VAT_REG_C: - if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C); - } - state->vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value; - state->attr_dirty[sub_cmd & CP_VAT_MASK] = true; - break; - - // Pointers to vertex arrays in GC RAM - case ARRAY_BASE: - state->array_bases[sub_cmd & CP_ARRAY_MASK] = - value & CommandProcessor::GetPhysicalAddressMask(); - state->bases_dirty = true; - break; - - case ARRAY_STRIDE: - state->array_strides[sub_cmd & CP_ARRAY_MASK] = value & 0xFF; - break; - - default: - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND); - WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value); - } -} - -void FillCPMemoryArray(u32* memory) -{ - memory[MATINDEX_A] = g_main_cp_state.matrix_index_a.Hex; - memory[MATINDEX_B] = g_main_cp_state.matrix_index_b.Hex; - memory[VCD_LO] = g_main_cp_state.vtx_desc.low.Hex; - memory[VCD_HI] = g_main_cp_state.vtx_desc.high.Hex; - - for (int i = 0; i < CP_NUM_VAT_REG; ++i) - { - memory[CP_VAT_REG_A + i] = g_main_cp_state.vtx_attr[i].g0.Hex; - memory[CP_VAT_REG_B + i] = g_main_cp_state.vtx_attr[i].g1.Hex; - memory[CP_VAT_REG_C + i] = g_main_cp_state.vtx_attr[i].g2.Hex; - } - - for (int i = 0; i < CP_NUM_ARRAYS; ++i) - { - memory[ARRAY_BASE + i] = g_main_cp_state.array_bases[i]; - memory[ARRAY_STRIDE + i] = g_main_cp_state.array_strides[i]; - } -} diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index b43f0d919f..d6bda13c00 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -3,17 +3,24 @@ #pragma once +#include #include #include #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "VideoCommon/CPMemory.h" class DataReader; class NativeVertexFormat; struct PortableVertexDeclaration; +namespace OpcodeDecoder +{ +enum class Primitive : u8; +}; + namespace VertexLoaderManager { using NativeVertexFormatMap = @@ -35,12 +42,13 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl); // Returns -1 if buf_size is insufficient, else the amount of bytes consumed -int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess); +int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, + bool is_preprocess); NativeVertexFormat* GetCurrentVertexFormat(); // Resolved pointers to array bases. Used by vertex loaders. -extern u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS]; +extern Common::EnumMap cached_arraybases; void UpdateVertexArrayPointers(); // Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite). @@ -50,4 +58,11 @@ extern u32 position_matrix_index[4]; // VB_HAS_X. Bitmask telling what vertex components are present. extern u32 g_current_components; + +extern BitSet8 g_main_vat_dirty; +extern BitSet8 g_preprocess_vat_dirty; +extern bool g_bases_dirty; // Main only +extern u8 g_current_vat; // Main only +extern std::array g_main_vertex_loaders; +extern std::array g_preprocess_vertex_loaders; } // namespace VertexLoaderManager diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index b204e131d7..40ae508219 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -15,6 +15,7 @@ #include "Common/JitRegister.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" +#include "VideoCommon/CPMemory.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/VertexLoaderManager.h" @@ -54,7 +55,7 @@ VertexLoaderX64::VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att) JitRegister::Register(region, GetCodePtr(), name.c_str()); } -OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute) +OpArg VertexLoaderX64::GetVertexAddr(CPArray array, VertexComponentFormat attribute) { OpArg data = MDisp(src_reg, m_src_ofs); if (IsIndexed(attribute)) @@ -62,7 +63,7 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute) int bits = attribute == VertexComponentFormat::Index8 ? 8 : 16; LoadAndSwap(bits, scratch1, data); m_src_ofs += bits / 8; - if (array == ARRAY_POSITION) + if (array == CPArray::Position) { CMP(bits, R(scratch1), Imm8(-1)); m_skip_vertex = J_CC(CC_E, true); @@ -121,7 +122,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com native_format->components = count_out; native_format->enable = true; native_format->offset = m_dst_ofs; - native_format->type = VAR_FLOAT; + native_format->type = ComponentFormat::Float; native_format->integer = false; m_dst_ofs += sizeof(float) * count_out; @@ -420,7 +421,7 @@ void VertexLoaderX64::GenerateVertexLoader() m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.offset = m_dst_ofs; - m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.posmtx.type = ComponentFormat::UByte; m_native_vtx_decl.posmtx.integer = true; m_src_ofs += sizeof(u8); m_dst_ofs += sizeof(u32); @@ -433,7 +434,7 @@ void VertexLoaderX64::GenerateVertexLoader() texmatidx_ofs[i] = m_src_ofs++; } - OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.low.Position); + OpArg data = GetVertexAddr(CPArray::Position, m_VtxDesc.low.Position); int pos_elements = m_VtxAttr.g0.PosElements == CoordComponentCount::XY ? 2 : 3; ReadVertex(data, m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements, m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position); @@ -448,7 +449,7 @@ void VertexLoaderX64::GenerateVertexLoader() { if (!i || m_VtxAttr.g0.NormalIndex3) { - data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.low.Normal); + data = GetVertexAddr(CPArray::Normal, m_VtxDesc.low.Normal); int elem_size = GetElementSize(m_VtxAttr.g0.NormalFormat); data.AddMemOffset(i * elem_size * 3); } @@ -457,27 +458,27 @@ void VertexLoaderX64::GenerateVertexLoader() } } - for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) + for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++) { if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent) { - data = GetVertexAddr(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i]); + data = GetVertexAddr(CPArray::Color0 + i, m_VtxDesc.low.Color[i]); ReadColor(data, m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i)); m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].enable = true; m_native_vtx_decl.colors[i].offset = m_dst_ofs; - m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.colors[i].type = ComponentFormat::UByte; m_native_vtx_decl.colors[i].integer = false; m_dst_ofs += 4; } } - for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) + for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) { int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::ST ? 2 : 1; if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent) { - data = GetVertexAddr(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i]); + data = GetVertexAddr(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i]); u8 scaling_exponent = m_VtxAttr.GetTexFrac(i); ReadVertex(data, m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements, m_VtxDesc.low.TexMatIdx[i] ? 2 : elements, m_VtxAttr.g0.ByteDequant, @@ -487,7 +488,7 @@ void VertexLoaderX64::GenerateVertexLoader() { m_native_vtx_decl.texcoords[i].components = 3; m_native_vtx_decl.texcoords[i].enable = true; - m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; + m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float; m_native_vtx_decl.texcoords[i].integer = false; MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i])); if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent) diff --git a/Source/Core/VideoCommon/VertexLoaderX64.h b/Source/Core/VideoCommon/VertexLoaderX64.h index 8a3fd5aa6b..6a0cf7b785 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.h +++ b/Source/Core/VideoCommon/VertexLoaderX64.h @@ -10,6 +10,7 @@ enum class VertexComponentFormat; enum class ComponentFormat; enum class ColorFormat; +enum class CPArray : u8; class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock { @@ -23,7 +24,7 @@ private: u32 m_src_ofs = 0; u32 m_dst_ofs = 0; Gen::FixupBranch m_skip_vertex; - Gen::OpArg GetVertexAddr(int array, VertexComponentFormat attribute); + Gen::OpArg GetVertexAddr(CPArray array, VertexComponentFormat attribute); int ReadVertex(Gen::OpArg data, VertexComponentFormat attribute, ComponentFormat format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format); diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index 4f2e04b0b4..b41272cbf1 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -6,6 +6,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/MsgHandler.h" #include "Common/Swap.h" @@ -78,8 +79,8 @@ void Color_ReadIndex_16b_565(VertexLoader* loader) { const auto index = DataRead(); const u8* const address = - VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); u16 value; std::memcpy(&value, address, sizeof(u16)); @@ -91,8 +92,8 @@ template void Color_ReadIndex_24b_888(VertexLoader* loader) { const auto index = DataRead(); - const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); SetCol(loader, Read24(address)); } @@ -100,18 +101,18 @@ template void Color_ReadIndex_32b_888x(VertexLoader* loader) { const auto index = DataRead(); - const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); SetCol(loader, Read24(address)); } template void Color_ReadIndex_16b_4444(VertexLoader* loader) { - auto const index = DataRead(); + const auto index = DataRead(); const u8* const address = - VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); u16 value; std::memcpy(&value, address, sizeof(u16)); @@ -123,9 +124,9 @@ template void Color_ReadIndex_24b_6666(VertexLoader* loader) { const auto index = DataRead(); - const u8* data = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]) - 1; - const u32 val = Common::swap32(data); + const u8* data = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); + const u32 val = Common::swap24(data); SetCol6666(loader, val); } @@ -133,8 +134,8 @@ template void Color_ReadIndex_32b_8888(VertexLoader* loader) { const auto index = DataRead(); - const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); SetCol(loader, Read32(address)); } @@ -166,7 +167,7 @@ void Color_ReadDirect_16b_4444(VertexLoader* loader) void Color_ReadDirect_24b_6666(VertexLoader* loader) { - SetCol6666(loader, Common::swap32(DataGetPosition() - 1)); + SetCol6666(loader, Common::swap24(DataGetPosition())); DataSkip(3); } @@ -175,21 +176,40 @@ void Color_ReadDirect_32b_8888(VertexLoader* loader) SetCol(loader, DataReadU32Unswapped()); } -constexpr TPipelineFunction s_table_read_color[4][6] = { - {nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}, - {Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x, - Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888}, - {Color_ReadIndex_16b_565, Color_ReadIndex_24b_888, Color_ReadIndex_32b_888x, - Color_ReadIndex_16b_4444, Color_ReadIndex_24b_6666, Color_ReadIndex_32b_8888}, - {Color_ReadIndex_16b_565, Color_ReadIndex_24b_888, Color_ReadIndex_32b_888x, - Color_ReadIndex_16b_4444, Color_ReadIndex_24b_6666, Color_ReadIndex_32b_8888}, +using Common::EnumMap; + +// These functions are to work around a "too many initializer values" error with nested brackets +// C++ does not let you write std::array, 2> a = {{1, 2}, {3, 4}} +// (although it does allow std::array, 2> b = {1, 2, 3, 4}) +constexpr EnumMap +f(EnumMap in) +{ + return in; +} +constexpr EnumMap g(EnumMap in) +{ + return in; +} + +template +using Table = EnumMap, VertexComponentFormat::Index16>; + +constexpr Table s_table_read_color = { + f({nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}), + f({Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x, + Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888}), + f({Color_ReadIndex_16b_565, Color_ReadIndex_24b_888, Color_ReadIndex_32b_888x, + Color_ReadIndex_16b_4444, Color_ReadIndex_24b_6666, Color_ReadIndex_32b_8888}), + f({Color_ReadIndex_16b_565, Color_ReadIndex_24b_888, Color_ReadIndex_32b_888x, + Color_ReadIndex_16b_4444, Color_ReadIndex_24b_6666, + Color_ReadIndex_32b_8888}), }; -constexpr u32 s_table_read_color_vertex_size[4][6] = { - {0, 0, 0, 0, 0, 0}, - {2, 3, 4, 2, 3, 4}, - {1, 1, 1, 1, 1, 1}, - {2, 2, 2, 2, 2, 2}, +constexpr Table s_table_read_color_vertex_size = { + g({0u, 0u, 0u, 0u, 0u, 0u}), + g({2u, 3u, 4u, 2u, 3u, 4u}), + g({1u, 1u, 1u, 1u, 1u, 1u}), + g({2u, 2u, 2u, 2u, 2u, 2u}), }; } // Anonymous namespace @@ -200,7 +220,7 @@ u32 VertexLoader_Color::GetSize(VertexComponentFormat type, ColorFormat format) PanicAlertFmt("Invalid color format {}", format); return 0; } - return s_table_read_color_vertex_size[u32(type)][u32(format)]; + return s_table_read_color_vertex_size[type][format]; } TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, ColorFormat format) @@ -210,5 +230,5 @@ TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, Co PanicAlertFmt("Invalid color format {}", format); return nullptr; } - return s_table_read_color[u32(type)][u32(format)]; + return s_table_read_color[type][format]; } diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index a69f78c887..254bcacff3 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -7,6 +7,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/VertexLoader.h" @@ -58,7 +59,7 @@ struct Normal_Direct { static void function([[maybe_unused]] VertexLoader* loader) { - auto const source = reinterpret_cast(DataGetPosition()); + const auto source = reinterpret_cast(DataGetPosition()); ReadIndirect(source); DataSkip(); } @@ -71,10 +72,10 @@ void Normal_Index_Offset() { static_assert(std::is_unsigned_v, "Only unsigned I is sane!"); - auto const index = DataRead(); - auto const data = reinterpret_cast( - VertexLoaderManager::cached_arraybases[ARRAY_NORMAL] + - (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset); + const auto index = DataRead(); + const auto data = reinterpret_cast( + VertexLoaderManager::cached_arraybases[CPArray::Normal] + + (index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset); ReadIndirect(data); } @@ -98,39 +99,6 @@ struct Normal_Index_Indices3 static constexpr u32 size = sizeof(I) * 3; }; -enum NormalType -{ - NRM_NOT_PRESENT = 0, - NRM_DIRECT = 1, - NRM_INDEX8 = 2, - NRM_INDEX16 = 3, - NUM_NRM_TYPE -}; - -enum NormalFormat -{ - FORMAT_UBYTE = 0, - FORMAT_BYTE = 1, - FORMAT_USHORT = 2, - FORMAT_SHORT = 3, - FORMAT_FLOAT = 4, - NUM_NRM_FORMAT -}; - -enum NormalElements -{ - NRM_NBT = 0, - NRM_NBT3 = 1, - NUM_NRM_ELEMENTS -}; - -enum NormalIndices -{ - NRM_INDICES1 = 0, - NRM_INDICES3 = 1, - NUM_NRM_INDICES -}; - struct Set { template @@ -145,83 +113,88 @@ struct Set TPipelineFunction function; }; -using Formats = std::array; -using Elements = std::array; -using Indices = std::array; -using Types = std::array; +using Common::EnumMap; +using Formats = EnumMap; +using Elements = EnumMap; +using Indices = std::array; +using Types = EnumMap; constexpr Types InitializeTable() { Types table{}; - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct(); + using VCF = VertexComponentFormat; + using NCC = NormalComponentCount; + using FMT = ComponentFormat; - // Same as above - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::UByte] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::Byte] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::UShort] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::Short] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::Float] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::UByte] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::Byte] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::UShort] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::Short] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::Float] = Normal_Direct(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index(); + // Same as above, since there are no indices + table[VCF::Direct][true][NCC::N][FMT::UByte] = Normal_Direct(); + table[VCF::Direct][true][NCC::N][FMT::Byte] = Normal_Direct(); + table[VCF::Direct][true][NCC::N][FMT::UShort] = Normal_Direct(); + table[VCF::Direct][true][NCC::N][FMT::Short] = Normal_Direct(); + table[VCF::Direct][true][NCC::N][FMT::Float] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::UByte] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::Byte] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::UShort] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::Short] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::Float] = Normal_Direct(); - // Same as above for NRM_NBT - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); + table[VCF::Index8][false][NCC::N][FMT::UByte] = Normal_Index(); + table[VCF::Index8][false][NCC::N][FMT::Byte] = Normal_Index(); + table[VCF::Index8][false][NCC::N][FMT::UShort] = Normal_Index(); + table[VCF::Index8][false][NCC::N][FMT::Short] = Normal_Index(); + table[VCF::Index8][false][NCC::N][FMT::Float] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::UByte] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::Byte] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::UShort] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::Short] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::Float] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index(); + // Same for NormalComponentCount::N; differs for NBT + table[VCF::Index8][true][NCC::N][FMT::UByte] = Normal_Index(); + table[VCF::Index8][true][NCC::N][FMT::Byte] = Normal_Index(); + table[VCF::Index8][true][NCC::N][FMT::UShort] = Normal_Index(); + table[VCF::Index8][true][NCC::N][FMT::Short] = Normal_Index(); + table[VCF::Index8][true][NCC::N][FMT::Float] = Normal_Index(); + table[VCF::Index8][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3(); + table[VCF::Index8][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3(); + table[VCF::Index8][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3(); + table[VCF::Index8][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3(); + table[VCF::Index8][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3(); - // Same as above for NRM_NBT - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); + table[VCF::Index16][false][NCC::N][FMT::UByte] = Normal_Index(); + table[VCF::Index16][false][NCC::N][FMT::Byte] = Normal_Index(); + table[VCF::Index16][false][NCC::N][FMT::UShort] = Normal_Index(); + table[VCF::Index16][false][NCC::N][FMT::Short] = Normal_Index(); + table[VCF::Index16][false][NCC::N][FMT::Float] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::UByte] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::Byte] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::UShort] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::Short] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::Float] = Normal_Index(); + + // Same for NormalComponentCount::N; differs for NBT + table[VCF::Index16][true][NCC::N][FMT::UByte] = Normal_Index(); + table[VCF::Index16][true][NCC::N][FMT::Byte] = Normal_Index(); + table[VCF::Index16][true][NCC::N][FMT::UShort] = Normal_Index(); + table[VCF::Index16][true][NCC::N][FMT::Short] = Normal_Index(); + table[VCF::Index16][true][NCC::N][FMT::Float] = Normal_Index(); + table[VCF::Index16][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3(); + table[VCF::Index16][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3(); + table[VCF::Index16][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3(); + table[VCF::Index16][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3(); + table[VCF::Index16][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3(); return table; } @@ -230,14 +203,14 @@ constexpr Types s_table = InitializeTable(); } // Anonymous namespace u32 VertexLoader_Normal::GetSize(VertexComponentFormat type, ComponentFormat format, - NormalComponentCount elements, u32 index3) + NormalComponentCount elements, bool index3) { - return s_table[u32(type)][index3][u32(elements)][u32(format)].gc_size; + return s_table[type][index3][elements][format].gc_size; } TPipelineFunction VertexLoader_Normal::GetFunction(VertexComponentFormat type, ComponentFormat format, - NormalComponentCount elements, u32 index3) + NormalComponentCount elements, bool index3) { - return s_table[u32(type)][index3][u32(elements)][u32(format)].function; + return s_table[type][index3][elements][format].function; } diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.h b/Source/Core/VideoCommon/VertexLoader_Normal.h index f416c590c9..30674159fb 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.h +++ b/Source/Core/VideoCommon/VertexLoader_Normal.h @@ -14,8 +14,8 @@ class VertexLoader_Normal { public: static u32 GetSize(VertexComponentFormat type, ComponentFormat format, - NormalComponentCount elements, u32 index3); + NormalComponentCount elements, bool index3); static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format, - NormalComponentCount elements, u32 index3); + NormalComponentCount elements, bool index3); }; diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 273cccebc3..0fe8e7ba72 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -7,6 +7,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Swap.h" #include "VideoCommon/DataReader.h" @@ -59,8 +60,8 @@ void Pos_ReadIndex(VertexLoader* loader) const auto index = DataRead(); loader->m_vertexSkip = index == std::numeric_limits::max(); const auto data = - reinterpret_cast(VertexLoaderManager::cached_arraybases[ARRAY_POSITION] + - (index * g_main_cp_state.array_strides[ARRAY_POSITION])); + reinterpret_cast(VertexLoaderManager::cached_arraybases[CPArray::Position] + + (index * g_main_cp_state.array_strides[CPArray::Position])); const auto scale = loader->m_posScale; DataReader dst(g_vertex_manager_write_ptr, nullptr); @@ -76,138 +77,109 @@ void Pos_ReadIndex(VertexLoader* loader) LOG_VTX(); } -constexpr TPipelineFunction s_table_read_position[4][8][2] = { - { - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - }, - { - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - }, - { - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - }, - { - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - }, +using Common::EnumMap; + +// These functions are to work around a "too many initializer values" error with nested brackets +// C++ does not let you write std::array, 2> a = {{1, 2}, {3, 4}} +// (although it does allow std::array, 2> b = {1, 2, 3, 4}) +constexpr EnumMap e(TPipelineFunction xy, + TPipelineFunction xyz) +{ + return {xy, xyz}; +} +constexpr EnumMap e(u32 xy, u32 xyz) +{ + return {xy, xyz}; +} + +constexpr EnumMap, ComponentFormat::Float> +f(EnumMap, ComponentFormat::Float> in) +{ + return in; +} + +constexpr EnumMap, ComponentFormat::Float> +g(EnumMap, ComponentFormat::Float> in) +{ + return in; +} + +template +using Table = EnumMap, ComponentFormat::Float>, + VertexComponentFormat::Index16>; + +constexpr Table s_table_read_position = { + f({ + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + }), + f({ + e(Pos_ReadDirect, Pos_ReadDirect), + e(Pos_ReadDirect, Pos_ReadDirect), + e(Pos_ReadDirect, Pos_ReadDirect), + e(Pos_ReadDirect, Pos_ReadDirect), + e(Pos_ReadDirect, Pos_ReadDirect), + }), + f({ + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + }), + f({ + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + }), }; -constexpr u32 s_table_read_position_vertex_size[4][8][2] = { - { - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - }, - { - {2, 3}, - {2, 3}, - {4, 6}, - {4, 6}, - {8, 12}, - }, - { - {1, 1}, - {1, 1}, - {1, 1}, - {1, 1}, - {1, 1}, - }, - { - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}, - }, +constexpr Table s_table_read_position_vertex_size = { + g({ + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + }), + g({ + e(2, 3), + e(2, 3), + e(4, 6), + e(4, 6), + e(8, 12), + }), + g({ + e(1, 1), + e(1, 1), + e(1, 1), + e(1, 1), + e(1, 1), + }), + g({ + e(2, 2), + e(2, 2), + e(2, 2), + e(2, 2), + e(2, 2), + }), }; } // Anonymous namespace u32 VertexLoader_Position::GetSize(VertexComponentFormat type, ComponentFormat format, CoordComponentCount elements) { - return s_table_read_position_vertex_size[u32(type)][u32(format)][u32(elements)]; + return s_table_read_position_vertex_size[type][format][elements]; } TPipelineFunction VertexLoader_Position::GetFunction(VertexComponentFormat type, ComponentFormat format, CoordComponentCount elements) { - return s_table_read_position[u32(type)][u32(format)][u32(elements)]; + return s_table_read_position[type][format][elements]; } diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp index aa01ab0bf3..89891df5a8 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp @@ -55,8 +55,8 @@ void TexCoord_ReadIndex(VertexLoader* loader) const auto index = DataRead(); const auto data = reinterpret_cast( - VertexLoaderManager::cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + - (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); + VertexLoaderManager::cached_arraybases[CPArray::TexCoord0 + loader->m_tcIndex] + + (index * g_main_cp_state.array_strides[CPArray::TexCoord0 + loader->m_tcIndex])); const auto scale = loader->m_tcScale[loader->m_tcIndex]; DataReader dst(g_vertex_manager_write_ptr, nullptr); @@ -67,140 +67,110 @@ void TexCoord_ReadIndex(VertexLoader* loader) ++loader->m_tcIndex; } -constexpr TPipelineFunction s_table_read_tex_coord[4][8][2] = { - { - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - }, - { - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - }, - { - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - }, - { - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - }, +using Common::EnumMap; +// These functions are to work around a "too many initializer values" error with nested brackets +// C++ does not let you write std::array, 2> a = {{1, 2}, {3, 4}} +// (although it does allow std::array, 2> b = {1, 2, 3, 4}) +constexpr EnumMap e(TPipelineFunction s, + TPipelineFunction st) +{ + return {s, st}; +} +constexpr EnumMap e(u32 s, u32 st) +{ + return {s, st}; +} + +constexpr EnumMap, ComponentFormat::Float> +f(EnumMap, ComponentFormat::Float> in) +{ + return in; +} + +constexpr EnumMap, ComponentFormat::Float> +g(EnumMap, ComponentFormat::Float> in) +{ + return in; +} + +template +using Table = EnumMap, ComponentFormat::Float>, + VertexComponentFormat::Index16>; + +constexpr Table s_table_read_tex_coord = { + f({ + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + }), + f({ + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + }), + f({ + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + }), + f({ + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + }), }; -constexpr u32 s_table_read_tex_coord_vertex_size[4][8][2] = { - { - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - }, - { - {1, 2}, - {1, 2}, - {2, 4}, - {2, 4}, - {4, 8}, - }, - { - {1, 1}, - {1, 1}, - {1, 1}, - {1, 1}, - {1, 1}, - }, - { - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}, - }, +constexpr Table s_table_read_tex_coord_vertex_size = { + g({ + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + }), + g({ + e(1, 2), + e(1, 2), + e(2, 4), + e(2, 4), + e(4, 8), + }), + g({ + e(1, 1), + e(1, 1), + e(1, 1), + e(1, 1), + e(1, 1), + }), + g({ + e(2, 2), + e(2, 2), + e(2, 2), + e(2, 2), + e(2, 2), + }), }; } // Anonymous namespace u32 VertexLoader_TextCoord::GetSize(VertexComponentFormat type, ComponentFormat format, TexComponentCount elements) { - return s_table_read_tex_coord_vertex_size[u32(type)][u32(format)][u32(elements)]; + return s_table_read_tex_coord_vertex_size[type][format][elements]; } TPipelineFunction VertexLoader_TextCoord::GetFunction(VertexComponentFormat type, ComponentFormat format, TexComponentCount elements) { - return s_table_read_tex_coord[u32(type)][u32(format)][u32(elements)]; + return s_table_read_tex_coord[type][format][elements]; } TPipelineFunction VertexLoader_TextCoord::GetDummyFunction() diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 63213edd3a..5fa85b2761 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -10,6 +10,7 @@ #include "Common/BitSet.h" #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Logging/Log.h" #include "Common/MathUtil.h" @@ -38,8 +39,10 @@ std::unique_ptr g_vertex_manager; +using OpcodeDecoder::Primitive; + // GX primitive -> RenderState primitive, no primitive restart -constexpr std::array primitive_from_gx{{ +constexpr Common::EnumMap primitive_from_gx{ PrimitiveType::Triangles, // GX_DRAW_QUADS PrimitiveType::Triangles, // GX_DRAW_QUADS_2 PrimitiveType::Triangles, // GX_DRAW_TRIANGLES @@ -48,10 +51,10 @@ constexpr std::array primitive_from_gx{{ PrimitiveType::Lines, // GX_DRAW_LINES PrimitiveType::Lines, // GX_DRAW_LINE_STRIP PrimitiveType::Points, // GX_DRAW_POINTS -}}; +}; // GX primitive -> RenderState primitive, using primitive restart -constexpr std::array primitive_from_gx_pr{{ +constexpr Common::EnumMap primitive_from_gx_pr{ PrimitiveType::TriangleStrip, // GX_DRAW_QUADS PrimitiveType::TriangleStrip, // GX_DRAW_QUADS_2 PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLES @@ -60,7 +63,7 @@ constexpr std::array primitive_from_gx_pr{{ PrimitiveType::Lines, // GX_DRAW_LINES PrimitiveType::Lines, // GX_DRAW_LINE_STRIP PrimitiveType::Points, // GX_DRAW_POINTS -}}; +}; // Due to the BT.601 standard which the GameCube is based on being a compromise // between PAL and NTSC, neither standard gets square pixels. They are each off @@ -107,13 +110,13 @@ u32 VertexManagerBase::GetRemainingSize() const return static_cast(m_end_buffer_pointer - m_cur_buffer_pointer); } -void VertexManagerBase::AddIndices(int primitive, u32 num_vertices) +void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices) { m_index_generator.AddIndices(primitive, num_vertices); } -DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride, - bool cullall) +DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, + u32 count, u32 stride, bool cullall) { // Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently. g_framebuffer_manager->FlushEFBPokes(); @@ -185,7 +188,7 @@ void VertexManagerBase::FlushData(u32 count, u32 stride) m_cur_buffer_pointer += count * stride; } -u32 VertexManagerBase::GetRemainingIndices(int primitive) const +u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const { const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen(); @@ -193,22 +196,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const { switch (primitive) { - case OpcodeDecoder::GX_DRAW_QUADS: - case OpcodeDecoder::GX_DRAW_QUADS_2: + case Primitive::GX_DRAW_QUADS: + case Primitive::GX_DRAW_QUADS_2: return index_len / 5 * 4; - case OpcodeDecoder::GX_DRAW_TRIANGLES: + case Primitive::GX_DRAW_TRIANGLES: return index_len / 4 * 3; - case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: + case Primitive::GX_DRAW_TRIANGLE_STRIP: return index_len / 1 - 1; - case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: + case Primitive::GX_DRAW_TRIANGLE_FAN: return index_len / 6 * 4 + 1; - case OpcodeDecoder::GX_DRAW_LINES: + case Primitive::GX_DRAW_LINES: return index_len; - case OpcodeDecoder::GX_DRAW_LINE_STRIP: + case Primitive::GX_DRAW_LINE_STRIP: return index_len / 2 + 1; - case OpcodeDecoder::GX_DRAW_POINTS: + case Primitive::GX_DRAW_POINTS: return index_len; default: @@ -219,22 +222,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const { switch (primitive) { - case OpcodeDecoder::GX_DRAW_QUADS: - case OpcodeDecoder::GX_DRAW_QUADS_2: + case Primitive::GX_DRAW_QUADS: + case Primitive::GX_DRAW_QUADS_2: return index_len / 6 * 4; - case OpcodeDecoder::GX_DRAW_TRIANGLES: + case Primitive::GX_DRAW_TRIANGLES: return index_len; - case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: + case Primitive::GX_DRAW_TRIANGLE_STRIP: return index_len / 3 + 2; - case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: + case Primitive::GX_DRAW_TRIANGLE_FAN: return index_len / 3 + 2; - case OpcodeDecoder::GX_DRAW_LINES: + case Primitive::GX_DRAW_LINES: return index_len; - case OpcodeDecoder::GX_DRAW_LINE_STRIP: + case Primitive::GX_DRAW_LINE_STRIP: return index_len / 2 + 1; - case OpcodeDecoder::GX_DRAW_POINTS: + case Primitive::GX_DRAW_POINTS: return index_len; default: diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index f41be70836..b3dd49aa61 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -35,6 +35,11 @@ enum TexelBufferFormat : u32 NUM_TEXEL_BUFFER_FORMATS }; +namespace OpcodeDecoder +{ +enum class Primitive : u8; +}; + class VertexManagerBase { private: @@ -93,8 +98,9 @@ public: virtual bool Initialize(); PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } - void AddIndices(int primitive, u32 num_vertices); - DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall); + void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); + DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, + bool cullall); void FlushData(u32 count, u32 stride); void Flush(); @@ -163,7 +169,7 @@ protected: virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex); u32 GetRemainingSize() const; - u32 GetRemainingIndices(int primitive) const; + u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const; void CalculateZSlope(NativeVertexFormat* format); void LoadTextures(); diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 1b0bf55de7..25c7f79b30 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -23,6 +23,7 @@ #include "VideoCommon/FreeLookCamera.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" diff --git a/Source/Core/VideoCommon/XFMemory.h b/Source/Core/VideoCommon/XFMemory.h index 2a44497ec7..189f695f41 100644 --- a/Source/Core/VideoCommon/XFMemory.h +++ b/Source/Core/VideoCommon/XFMemory.h @@ -13,8 +13,6 @@ #include "Common/EnumFormatter.h" #include "VideoCommon/CPMemory.h" -class DataReader; - constexpr size_t NUM_XF_COLOR_CHANNELS = 2; // Lighting @@ -454,10 +452,10 @@ struct XFMemory u32 unk9[8]; // 0x1048 - 0x104f PostMtxInfo postMtxInfo[8]; // 0x1050 - 0x1057 }; -static_assert(sizeof(XFMemory) == sizeof(u32) * 0x1058); +static_assert(sizeof(XFMemory) == sizeof(u32) * XFMEM_REGISTERS_END); extern XFMemory xfmem; -void LoadXFReg(u32 transferSize, u32 address, DataReader src); -void LoadIndexedXF(u32 val, int array); -void PreprocessIndexedXF(u32 val, int refarray); +void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data); +void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size); +void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size); diff --git a/Source/Core/VideoCommon/XFStructs.cpp b/Source/Core/VideoCommon/XFStructs.cpp index ebf9e8efab..665e2ac22d 100644 --- a/Source/Core/VideoCommon/XFStructs.cpp +++ b/Source/Core/VideoCommon/XFStructs.cpp @@ -12,7 +12,6 @@ #include "Core/HW/Memmap.h" #include "VideoCommon/CPMemory.h" -#include "VideoCommon/DataReader.h" #include "VideoCommon/Fifo.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/PixelShaderManager.h" @@ -26,16 +25,10 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress) VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize); } -static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) +static void XFRegWritten(u32 address, u32 value) { - u32 address = baseAddress; - u32 dataIndex = 0; - - while (transferSize > 0 && address < XFMEM_REGISTERS_END) + if (address >= XFMEM_REGISTERS_START && address < XFMEM_REGISTERS_END) { - u32 newValue = src.Peek(dataIndex * sizeof(u32)); - u32 nextAddress = address + 1; - switch (address) { case XFMEM_ERROR: @@ -44,12 +37,12 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_STATE1: // internal state 1 case XFMEM_CLOCK: case XFMEM_SETGPMETRIC: - nextAddress = 0x1007; + // Not implemented break; case XFMEM_CLIPDISABLE: { - ClipDisable setting{.hex = newValue}; + ClipDisable setting{.hex = value}; if (setting.disable_clipping_detection) DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::SETS_XF_CLIPDISABLE_BIT_0); if (setting.disable_trivial_rejection) @@ -63,7 +56,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) break; case XFMEM_SETNUMCHAN: - if (xfmem.numChan.numColorChans != (newValue & 3)) + if (xfmem.numChan.numColorChans != (value & 3)) g_vertex_manager->Flush(); VertexShaderManager::SetLightingConfigChanged(); break; @@ -72,7 +65,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETCHAN1_AMBCOLOR: { u8 chan = address - XFMEM_SETCHAN0_AMBCOLOR; - if (xfmem.ambColor[chan] != newValue) + if (xfmem.ambColor[chan] != value) { g_vertex_manager->Flush(); VertexShaderManager::SetMaterialColorChanged(chan); @@ -84,7 +77,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETCHAN1_MATCOLOR: { u8 chan = address - XFMEM_SETCHAN0_MATCOLOR; - if (xfmem.matColor[chan] != newValue) + if (xfmem.matColor[chan] != value) { g_vertex_manager->Flush(); VertexShaderManager::SetMaterialColorChanged(chan + 2); @@ -96,22 +89,22 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETCHAN1_COLOR: case XFMEM_SETCHAN0_ALPHA: // Channel Alpha case XFMEM_SETCHAN1_ALPHA: - if (((u32*)&xfmem)[address] != (newValue & 0x7fff)) + if (((u32*)&xfmem)[address] != (value & 0x7fff)) g_vertex_manager->Flush(); VertexShaderManager::SetLightingConfigChanged(); break; case XFMEM_DUALTEX: - if (xfmem.dualTexTrans.enabled != bool(newValue & 1)) + if (xfmem.dualTexTrans.enabled != bool(value & 1)) g_vertex_manager->Flush(); VertexShaderManager::SetTexMatrixInfoChanged(-1); break; case XFMEM_SETMATRIXINDA: - VertexShaderManager::SetTexMatrixChangedA(newValue); + VertexShaderManager::SetTexMatrixChangedA(value); break; case XFMEM_SETMATRIXINDB: - VertexShaderManager::SetTexMatrixChangedB(newValue); + VertexShaderManager::SetTexMatrixChangedB(value); break; case XFMEM_SETVIEWPORT: @@ -124,8 +117,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) VertexShaderManager::SetViewportChanged(); PixelShaderManager::SetViewportChanged(); GeometryShaderManager::SetViewportChanged(); - - nextAddress = XFMEM_SETVIEWPORT + 6; break; case XFMEM_SETPROJECTION: @@ -138,12 +129,10 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) g_vertex_manager->Flush(); VertexShaderManager::SetProjectionChanged(); GeometryShaderManager::SetProjectionChanged(); - - nextAddress = XFMEM_SETPROJECTION + 7; break; case XFMEM_SETNUMTEXGENS: // GXSetNumTexGens - if (xfmem.numTexGen.numTexGens != (newValue & 15)) + if (xfmem.numTexGen.numTexGens != (value & 15)) g_vertex_manager->Flush(); break; @@ -157,8 +146,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETTEXMTXINFO + 7: g_vertex_manager->Flush(); VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETTEXMTXINFO); - - nextAddress = XFMEM_SETTEXMTXINFO + 8; break; case XFMEM_SETPOSTMTXINFO: @@ -171,8 +158,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETPOSTMTXINFO + 7: g_vertex_manager->Flush(); VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETPOSTMTXINFO); - - nextAddress = XFMEM_SETPOSTMTXINFO + 8; break; // -------------- @@ -189,7 +174,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case 0x104e: case 0x104f: DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); - DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, newValue); + DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, value); break; case 0x1013: @@ -200,83 +185,69 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) default: DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); - WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, newValue); + WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, value); break; } - - int transferred = nextAddress - address; - address = nextAddress; - - transferSize -= transferred; - dataIndex += transferred; } } -void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src) +void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data) { - // do not allow writes past registers - if (baseAddress + transferSize > XFMEM_REGISTERS_END) + if (base_address > XFMEM_REGISTERS_END) { - WARN_LOG_FMT(VIDEO, "XF load exceeds address space: {:x} {} bytes", baseAddress, transferSize); - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); + WARN_LOG_FMT(VIDEO, "XF load base address past end of address space: {:x} {} bytes", + base_address, transfer_size); + return; + } - if (baseAddress >= XFMEM_REGISTERS_END) - transferSize = 0; - else - transferSize = XFMEM_REGISTERS_END - baseAddress; + u32 end_address = base_address + transfer_size; // exclusive + + // do not allow writes past registers + if (end_address > XFMEM_REGISTERS_END) + { + WARN_LOG_FMT(VIDEO, "XF load ends past end of address space: {:x} {} bytes", base_address, + transfer_size); + end_address = XFMEM_REGISTERS_END; } // write to XF mem - if (baseAddress < XFMEM_REGISTERS_START && transferSize > 0) + if (base_address < XFMEM_REGISTERS_START) { - u32 end = baseAddress + transferSize; + const u32 xf_mem_base = base_address; + u32 xf_mem_transfer_size = transfer_size; - u32 xfMemBase = baseAddress; - u32 xfMemTransferSize = transferSize; - - if (end >= XFMEM_REGISTERS_START) + if (end_address > XFMEM_REGISTERS_START) { - xfMemTransferSize = XFMEM_REGISTERS_START - baseAddress; - - baseAddress = XFMEM_REGISTERS_START; - transferSize = end - XFMEM_REGISTERS_START; - } - else - { - transferSize = 0; + xf_mem_transfer_size = XFMEM_REGISTERS_START - base_address; + base_address = XFMEM_REGISTERS_START; } - XFMemWritten(xfMemTransferSize, xfMemBase); - for (u32 i = 0; i < xfMemTransferSize; i++) + XFMemWritten(xf_mem_transfer_size, xf_mem_base); + for (u32 i = 0; i < xf_mem_transfer_size; i++) { - ((u32*)&xfmem)[xfMemBase + i] = src.Read(); + ((u32*)&xfmem)[xf_mem_base + i] = Common::swap32(data); + data += 4; } } // write to XF regs - if (transferSize > 0) + if (base_address >= XFMEM_REGISTERS_START) { - XFRegWritten(transferSize, baseAddress, src); - for (u32 i = 0; i < transferSize; i++) + for (u32 address = base_address; address < end_address; address++) { - ((u32*)&xfmem)[baseAddress + i] = src.Read(); + const u32 value = Common::swap32(data); + + XFRegWritten(address, value); + ((u32*)&xfmem)[address] = value; + + data += 4; } } } -constexpr std::tuple ExtractIndexedXF(u32 val) -{ - const u32 index = val >> 16; - const u32 address = val & 0xFFF; // check mask - const u32 size = ((val >> 12) & 0xF) + 1; - - return {index, address, size}; -} - // TODO - verify that it is correct. Seems to work, though. -void LoadIndexedXF(u32 val, int refarray) +void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size) { - const auto [index, address, size] = ExtractIndexedXF(val); // load stuff from array to address in xf mem u32* currData = (u32*)(&xfmem) + address; @@ -287,8 +258,8 @@ void LoadIndexedXF(u32 val, int refarray) } else { - newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] + - g_main_cp_state.array_strides[refarray] * index); + newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] + + g_main_cp_state.array_strides[array] * index); } bool changed = false; for (u32 i = 0; i < size; ++i) @@ -307,12 +278,10 @@ void LoadIndexedXF(u32 val, int refarray) } } -void PreprocessIndexedXF(u32 val, int refarray) +void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size) { - const auto [index, address, size] = ExtractIndexedXF(val); - - const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] + - g_preprocess_cp_state.array_strides[refarray] * index); + const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[array] + + g_preprocess_cp_state.array_strides[array] * index); const size_t buf_size = size * sizeof(u32); Fifo::PushFifoAuxBuffer(new_data, buf_size); @@ -581,13 +550,9 @@ std::string GetXFMemDescription(u32 address, u32 value) } } -std::pair GetXFTransferInfo(const u8* data) +std::pair GetXFTransferInfo(u16 base_address, u8 transfer_size, + const u8* data) { - const u32 cmd = Common::swap32(data); - data += 4; - u32 base_address = cmd & 0xFFFF; - const u32 transfer_size = ((cmd >> 16) & 15) + 1; - if (base_address > XFMEM_REGISTERS_END) { return std::make_pair("Invalid XF Transfer", "Base address past end of address space"); @@ -655,10 +620,9 @@ std::pair GetXFTransferInfo(const u8* data) return std::make_pair(fmt::to_string(name), fmt::to_string(desc)); } -std::pair GetXFIndexedLoadInfo(u8 array, u32 value) +std::pair GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address, + u8 size) { - const auto [index, address, size] = ExtractIndexedXF(value); - const auto desc = fmt::format("Load {} bytes to XF address {:03x} from CP array {} row {}", size, address, array, index); fmt::memory_buffer written; diff --git a/Source/Core/VideoCommon/XFStructs.h b/Source/Core/VideoCommon/XFStructs.h index 8f839baf97..caf197b7f0 100644 --- a/Source/Core/VideoCommon/XFStructs.h +++ b/Source/Core/VideoCommon/XFStructs.h @@ -11,5 +11,7 @@ std::pair GetXFRegInfo(u32 address, u32 value); std::string GetXFMemName(u32 address); std::string GetXFMemDescription(u32 address, u32 value); -std::pair GetXFTransferInfo(const u8* data); -std::pair GetXFIndexedLoadInfo(u8 array, u32 value); +std::pair GetXFTransferInfo(u16 base_address, u8 transfer_size, + const u8* data); +std::pair GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address, + u8 size); diff --git a/Source/UnitTests/Common/EnumFormatterTest.cpp b/Source/UnitTests/Common/EnumFormatterTest.cpp index 55e03152ef..793328b0dd 100644 --- a/Source/UnitTests/Common/EnumFormatterTest.cpp +++ b/Source/UnitTests/Common/EnumFormatterTest.cpp @@ -46,6 +46,12 @@ TEST(EnumUtil, Enum1) EXPECT_EQ(fmt::format("{:s}", Enum1::C), "0x2u /* C */"); EXPECT_EQ(fmt::format("{:s}", static_cast(3)), "0x3u /* Invalid */"); EXPECT_EQ(fmt::format("{:s}", static_cast(4)), "0x4u /* Invalid */"); + + EXPECT_EQ(fmt::format("{:n}", Enum1::A), "A"); + EXPECT_EQ(fmt::format("{:n}", Enum1::B), "B"); + EXPECT_EQ(fmt::format("{:n}", Enum1::C), "C"); + EXPECT_EQ(fmt::format("{:n}", static_cast(3)), "Invalid (3)"); + EXPECT_EQ(fmt::format("{:n}", static_cast(4)), "Invalid (4)"); } TEST(EnumUtil, Enum2) @@ -63,4 +69,11 @@ TEST(EnumUtil, Enum2) EXPECT_EQ(fmt::format("{:s}", Enum2::F), "0x3u /* F */"); EXPECT_EQ(fmt::format("{:s}", static_cast(4)), "0x4u /* Invalid */"); EXPECT_EQ(fmt::format("{:s}", static_cast(-1)), "0xffffffffu /* Invalid */"); + + EXPECT_EQ(fmt::format("{:n}", Enum2::D), "D"); + EXPECT_EQ(fmt::format("{:n}", Enum2::E), "E"); + EXPECT_EQ(fmt::format("{:n}", static_cast(2)), "Invalid (2)"); + EXPECT_EQ(fmt::format("{:n}", Enum2::F), "F"); + EXPECT_EQ(fmt::format("{:n}", static_cast(4)), "Invalid (4)"); + EXPECT_EQ(fmt::format("{:n}", static_cast(-1)), "Invalid (-1)"); } diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index 81b3e5ee53..e72fe28c29 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -174,8 +174,8 @@ TEST_P(VertexLoaderParamTest, PositionAll) Input(i); else Input(i); - VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); - g_main_cp_state.array_strides[ARRAY_POSITION] = elem_count * elem_size; + VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::Position] = elem_count * elem_size; } CreateAndCheckSizes(input_size, elem_count * sizeof(float)); for (float value : values) @@ -243,8 +243,8 @@ TEST_F(VertexLoaderTest, PositionIndex16FloatXY) CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float)); Input(1); Input(0); - VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); - g_main_cp_state.array_strides[ARRAY_POSITION] = sizeof(float); // ;) + VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::Position] = sizeof(float); // ;) Input(1.f); Input(2.f); Input(3.f); @@ -357,8 +357,8 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) for (int i = 0; i < NUM_VERTEX_COMPONENT_ARRAYS; i++) { - VertexLoaderManager::cached_arraybases[i] = m_src.GetPointer(); - g_main_cp_state.array_strides[i] = 129; + VertexLoaderManager::cached_arraybases[static_cast(i)] = m_src.GetPointer(); + g_main_cp_state.array_strides[static_cast(i)] = 129; } // This test is only done 100x in a row since it's ~20x slower using the