Merge pull request #9718 from Pokechu22/better-fifo-analyzer-part-3

Fifo analyzer improvements, part 3
This commit is contained in:
JMC47 2021-12-20 14:27:14 -05:00 committed by GitHub
commit 32fed91b0d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
66 changed files with 2757 additions and 2695 deletions

View File

@ -55,9 +55,9 @@ public:
constexpr auto parse(fmt::format_parse_context& ctx) constexpr auto parse(fmt::format_parse_context& ctx)
{ {
auto it = ctx.begin(), end = ctx.end(); auto it = ctx.begin(), end = ctx.end();
// 'u' for user display, 's' for shader generation // 'u' for user display, 's' for shader generation, 'n' for name only
if (it != end && (*it == 'u' || *it == 's')) if (it != end && (*it == 'u' || *it == 's' || *it == 'n'))
formatting_for_shader = (*it++ == 's'); format_type = *it++;
return it; return it;
} }
@ -68,19 +68,24 @@ public:
const auto value_u = static_cast<std::make_unsigned_t<T>>(value_s); // Always unsigned const auto value_u = static_cast<std::make_unsigned_t<T>>(value_s); // Always unsigned
const bool has_name = m_names.InBounds(e) && m_names[e] != nullptr; const bool has_name = m_names.InBounds(e) && m_names[e] != nullptr;
if (!formatting_for_shader) switch (format_type)
{ {
default:
case 'u':
if (has_name) if (has_name)
return fmt::format_to(ctx.out(), "{} ({})", m_names[e], value_s); return fmt::format_to(ctx.out(), "{} ({})", m_names[e], value_s);
else else
return fmt::format_to(ctx.out(), "Invalid ({})", value_s); return fmt::format_to(ctx.out(), "Invalid ({})", value_s);
} case 's':
else
{
if (has_name) if (has_name)
return fmt::format_to(ctx.out(), "{:#x}u /* {} */", value_u, m_names[e]); return fmt::format_to(ctx.out(), "{:#x}u /* {} */", value_u, m_names[e]);
else else
return fmt::format_to(ctx.out(), "{:#x}u /* Invalid */", value_u); return fmt::format_to(ctx.out(), "{:#x}u /* Invalid */", value_u);
case 'n':
if (has_name)
return fmt::format_to(ctx.out(), "{}", m_names[e]);
else
return fmt::format_to(ctx.out(), "Invalid ({})", value_s);
} }
} }
@ -92,5 +97,5 @@ protected:
private: private:
const array_type m_names; const array_type m_names;
bool formatting_for_shader = false; char format_type = 'u';
}; };

View File

@ -103,16 +103,10 @@ add_library(core
DSP/LabelMap.h DSP/LabelMap.h
DSPEmulator.cpp DSPEmulator.cpp
DSPEmulator.h DSPEmulator.h
FifoPlayer/FifoAnalyzer.cpp
FifoPlayer/FifoAnalyzer.h
FifoPlayer/FifoDataFile.cpp FifoPlayer/FifoDataFile.cpp
FifoPlayer/FifoDataFile.h FifoPlayer/FifoDataFile.h
FifoPlayer/FifoPlaybackAnalyzer.cpp
FifoPlayer/FifoPlaybackAnalyzer.h
FifoPlayer/FifoPlayer.cpp FifoPlayer/FifoPlayer.cpp
FifoPlayer/FifoPlayer.h FifoPlayer/FifoPlayer.h
FifoPlayer/FifoRecordAnalyzer.cpp
FifoPlayer/FifoRecordAnalyzer.h
FifoPlayer/FifoRecorder.cpp FifoPlayer/FifoRecorder.cpp
FifoPlayer/FifoRecorder.h FifoPlayer/FifoRecorder.h
FreeLookConfig.cpp FreeLookConfig.cpp

View File

@ -1,294 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include <numeric>
#include "Common/Assert.h"
#include "Common/MsgHandler.h"
#include "Common/Swap.h"
#include "Core/FifoPlayer/FifoRecordAnalyzer.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoader_TextCoord.h"
namespace FifoAnalyzer
{
namespace
{
u8 ReadFifo8(const u8*& data)
{
const u8 value = data[0];
data += 1;
return value;
}
u16 ReadFifo16(const u8*& data)
{
const u16 value = Common::swap16(data);
data += 2;
return value;
}
u32 ReadFifo32(const u8*& data)
{
const u32 value = Common::swap32(data);
data += 4;
return value;
}
std::array<int, 21> CalculateVertexElementSizes(int vatIndex, const CPMemory& cpMem)
{
const TVtxDesc& vtxDesc = cpMem.vtxDesc;
const VAT& vtxAttr = cpMem.vtxAttr[vatIndex];
// Colors
const std::array<ColorFormat, 2> colComp{
vtxAttr.g0.Color0Comp,
vtxAttr.g0.Color1Comp,
};
const std::array<TexComponentCount, 8> tcElements{
vtxAttr.g0.Tex0CoordElements, vtxAttr.g1.Tex1CoordElements, vtxAttr.g1.Tex2CoordElements,
vtxAttr.g1.Tex3CoordElements, vtxAttr.g1.Tex4CoordElements, vtxAttr.g2.Tex5CoordElements,
vtxAttr.g2.Tex6CoordElements, vtxAttr.g2.Tex7CoordElements,
};
const std::array<ComponentFormat, 8> tcFormat{
vtxAttr.g0.Tex0CoordFormat, vtxAttr.g1.Tex1CoordFormat, vtxAttr.g1.Tex2CoordFormat,
vtxAttr.g1.Tex3CoordFormat, vtxAttr.g1.Tex4CoordFormat, vtxAttr.g2.Tex5CoordFormat,
vtxAttr.g2.Tex6CoordFormat, vtxAttr.g2.Tex7CoordFormat,
};
std::array<int, 21> sizes{};
// Add position and texture matrix indices
sizes[0] = vtxDesc.low.PosMatIdx;
for (size_t i = 0; i < vtxDesc.low.TexMatIdx.Size(); ++i)
{
sizes[i + 1] = vtxDesc.low.TexMatIdx[i];
}
// Position
sizes[9] = VertexLoader_Position::GetSize(vtxDesc.low.Position, vtxAttr.g0.PosFormat,
vtxAttr.g0.PosElements);
// Normals
if (vtxDesc.low.Normal != VertexComponentFormat::NotPresent)
{
sizes[10] = VertexLoader_Normal::GetSize(vtxDesc.low.Normal, vtxAttr.g0.NormalFormat,
vtxAttr.g0.NormalElements, vtxAttr.g0.NormalIndex3);
}
else
{
sizes[10] = 0;
}
// Colors
for (size_t i = 0; i < vtxDesc.low.Color.Size(); i++)
{
int size = 0;
switch (vtxDesc.low.Color[i])
{
case VertexComponentFormat::NotPresent:
break;
case VertexComponentFormat::Direct:
switch (colComp[i])
{
case ColorFormat::RGB565:
size = 2;
break;
case ColorFormat::RGB888:
size = 3;
break;
case ColorFormat::RGB888x:
size = 4;
break;
case ColorFormat::RGBA4444:
size = 2;
break;
case ColorFormat::RGBA6666:
size = 3;
break;
case ColorFormat::RGBA8888:
size = 4;
break;
default:
ASSERT(0);
break;
}
break;
case VertexComponentFormat::Index8:
size = 1;
break;
case VertexComponentFormat::Index16:
size = 2;
break;
}
sizes[11 + i] = size;
}
// Texture coordinates
for (size_t i = 0; i < tcFormat.size(); i++)
{
sizes[13 + i] =
VertexLoader_TextCoord::GetSize(vtxDesc.high.TexCoord[i], tcFormat[i], tcElements[i]);
}
return sizes;
}
} // Anonymous namespace
bool s_DrawingObject;
FifoAnalyzer::CPMemory s_CpMem;
u32 AnalyzeCommand(const u8* data, DecodeMode mode)
{
const u8* dataStart = data;
int cmd = ReadFifo8(data);
switch (cmd)
{
case OpcodeDecoder::GX_NOP:
case OpcodeDecoder::GX_CMD_UNKNOWN_METRICS:
case OpcodeDecoder::GX_CMD_INVL_VC:
break;
case OpcodeDecoder::GX_LOAD_CP_REG:
{
s_DrawingObject = false;
u32 cmd2 = ReadFifo8(data);
u32 value = ReadFifo32(data);
LoadCPReg(cmd2, value, s_CpMem);
break;
}
case OpcodeDecoder::GX_LOAD_XF_REG:
{
s_DrawingObject = false;
u32 cmd2 = ReadFifo32(data);
u8 streamSize = ((cmd2 >> 16) & 15) + 1;
data += streamSize * 4;
break;
}
case OpcodeDecoder::GX_LOAD_INDX_A:
case OpcodeDecoder::GX_LOAD_INDX_B:
case OpcodeDecoder::GX_LOAD_INDX_C:
case OpcodeDecoder::GX_LOAD_INDX_D:
{
s_DrawingObject = false;
int array = 0xc + (cmd - OpcodeDecoder::GX_LOAD_INDX_A) / 8;
u32 value = ReadFifo32(data);
if (mode == DecodeMode::Record)
FifoRecordAnalyzer::ProcessLoadIndexedXf(value, array);
break;
}
case OpcodeDecoder::GX_CMD_CALL_DL:
// The recorder should have expanded display lists into the fifo stream and skipped the call to
// start them
// That is done to make it easier to track where memory is updated
ASSERT(false);
data += 8;
break;
case OpcodeDecoder::GX_LOAD_BP_REG:
{
s_DrawingObject = false;
ReadFifo32(data);
break;
}
default:
if (cmd & 0x80)
{
s_DrawingObject = true;
const std::array<int, 21> sizes =
CalculateVertexElementSizes(cmd & OpcodeDecoder::GX_VAT_MASK, s_CpMem);
// Determine offset of each element that might be a vertex array
// The first 9 elements are never vertex arrays so we just accumulate their sizes.
int offset = std::accumulate(sizes.begin(), sizes.begin() + 9, 0u);
std::array<int, NUM_VERTEX_COMPONENT_ARRAYS> offsets;
for (size_t i = 0; i < offsets.size(); ++i)
{
offsets[i] = offset;
offset += sizes[i + 9];
}
const int vertexSize = offset;
const int numVertices = ReadFifo16(data);
if (mode == DecodeMode::Record && numVertices > 0)
{
for (size_t i = 0; i < offsets.size(); ++i)
{
FifoRecordAnalyzer::WriteVertexArray(static_cast<int>(i), data + offsets[i], vertexSize,
numVertices);
}
}
data += numVertices * vertexSize;
}
else
{
PanicAlertFmt("FifoPlayer: Unknown Opcode ({:#x}).\n", cmd);
return 0;
}
break;
}
return (u32)(data - dataStart);
}
void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem)
{
switch (subCmd & CP_COMMAND_MASK)
{
case VCD_LO:
cpMem.vtxDesc.low.Hex = value;
break;
case VCD_HI:
cpMem.vtxDesc.high.Hex = value;
break;
case CP_VAT_REG_A:
ASSERT(subCmd - CP_VAT_REG_A < CP_NUM_VAT_REG);
cpMem.vtxAttr[subCmd & CP_VAT_MASK].g0.Hex = value;
break;
case CP_VAT_REG_B:
ASSERT(subCmd - CP_VAT_REG_B < CP_NUM_VAT_REG);
cpMem.vtxAttr[subCmd & CP_VAT_MASK].g1.Hex = value;
break;
case CP_VAT_REG_C:
ASSERT(subCmd - CP_VAT_REG_C < CP_NUM_VAT_REG);
cpMem.vtxAttr[subCmd & CP_VAT_MASK].g2.Hex = value;
break;
case ARRAY_BASE:
cpMem.arrayBases[subCmd & CP_ARRAY_MASK] = value;
break;
case ARRAY_STRIDE:
cpMem.arrayStrides[subCmd & CP_ARRAY_MASK] = value & 0xFF;
break;
}
}
} // namespace FifoAnalyzer

View File

@ -1,33 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h"
namespace FifoAnalyzer
{
enum class DecodeMode
{
Record,
Playback,
};
u32 AnalyzeCommand(const u8* data, DecodeMode mode);
struct CPMemory
{
TVtxDesc vtxDesc;
std::array<VAT, CP_NUM_VAT_REG> vtxAttr;
std::array<u32, CP_NUM_ARRAYS> arrayBases{};
std::array<u32, CP_NUM_ARRAYS> arrayStrides{};
};
void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem);
extern bool s_DrawingObject;
extern FifoAnalyzer::CPMemory s_CpMem;
} // namespace FifoAnalyzer

View File

@ -1,111 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include <vector>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoDataFile.h"
using namespace FifoAnalyzer;
// For debugging
#define LOG_FIFO_CMDS 0
struct CmdData
{
u32 size;
u32 offset;
const u8* ptr;
};
void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file,
std::vector<AnalyzedFrameInfo>& frameInfo)
{
u32* cpMem = file->GetCPMem();
FifoAnalyzer::LoadCPReg(VCD_LO, cpMem[VCD_LO], s_CpMem);
FifoAnalyzer::LoadCPReg(VCD_HI, cpMem[VCD_HI], s_CpMem);
for (u32 i = 0; i < CP_NUM_VAT_REG; ++i)
{
FifoAnalyzer::LoadCPReg(CP_VAT_REG_A + i, cpMem[CP_VAT_REG_A + i], s_CpMem);
FifoAnalyzer::LoadCPReg(CP_VAT_REG_B + i, cpMem[CP_VAT_REG_B + i], s_CpMem);
FifoAnalyzer::LoadCPReg(CP_VAT_REG_C + i, cpMem[CP_VAT_REG_C + i], s_CpMem);
}
frameInfo.clear();
frameInfo.resize(file->GetFrameCount());
for (u32 frameIdx = 0; frameIdx < file->GetFrameCount(); ++frameIdx)
{
const FifoFrameInfo& frame = file->GetFrame(frameIdx);
AnalyzedFrameInfo& analyzed = frameInfo[frameIdx];
s_DrawingObject = false;
u32 cmdStart = 0;
u32 nextMemUpdate = 0;
#if LOG_FIFO_CMDS
// Debugging
std::vector<CmdData> prevCmds;
#endif
while (cmdStart < frame.fifoData.size())
{
// Add memory updates that have occurred before this point in the frame
while (nextMemUpdate < frame.memoryUpdates.size() &&
frame.memoryUpdates[nextMemUpdate].fifoPosition <= cmdStart)
{
analyzed.memoryUpdates.push_back(frame.memoryUpdates[nextMemUpdate]);
++nextMemUpdate;
}
const bool wasDrawing = s_DrawingObject;
const u32 cmdSize =
FifoAnalyzer::AnalyzeCommand(&frame.fifoData[cmdStart], DecodeMode::Playback);
#if LOG_FIFO_CMDS
CmdData cmdData;
cmdData.offset = cmdStart;
cmdData.ptr = &frame.fifoData[cmdStart];
cmdData.size = cmdSize;
prevCmds.push_back(cmdData);
#endif
// Check for error
if (cmdSize == 0)
{
// Clean up frame analysis
analyzed.objectStarts.clear();
analyzed.objectCPStates.clear();
analyzed.objectEnds.clear();
return;
}
if (wasDrawing != s_DrawingObject)
{
if (s_DrawingObject)
{
analyzed.objectStarts.push_back(cmdStart);
analyzed.objectCPStates.push_back(s_CpMem);
}
else
{
analyzed.objectEnds.push_back(cmdStart);
}
}
cmdStart += cmdSize;
}
if (analyzed.objectEnds.size() < analyzed.objectStarts.size())
analyzed.objectEnds.push_back(cmdStart);
ASSERT(analyzed.objectStarts.size() == analyzed.objectCPStates.size());
ASSERT(analyzed.objectStarts.size() == analyzed.objectEnds.size());
}
}

View File

@ -1,25 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string>
#include <vector>
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoDataFile.h"
struct AnalyzedFrameInfo
{
// Start of the primitives for the object (after previous update commands)
std::vector<u32> objectStarts;
std::vector<FifoAnalyzer::CPMemory> objectCPStates;
// End of the primitives for the object
std::vector<u32> objectEnds;
std::vector<MemoryUpdate> memoryUpdates;
};
namespace FifoPlaybackAnalyzer
{
void AnalyzeFrames(FifoDataFile* file, std::vector<AnalyzedFrameInfo>& frameInfo);
} // namespace FifoPlaybackAnalyzer

View File

@ -4,6 +4,7 @@
#include "Core/FifoPlayer/FifoPlayer.h" #include "Core/FifoPlayer/FifoPlayer.h"
#include <algorithm> #include <algorithm>
#include <cstring>
#include <mutex> #include <mutex>
#include "Common/Assert.h" #include "Common/Assert.h"
@ -12,7 +13,6 @@
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
#include "Core/Core.h" #include "Core/Core.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoDataFile.h" #include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/HW/CPU.h" #include "Core/HW/CPU.h"
#include "Core/HW/GPFifo.h" #include "Core/HW/GPFifo.h"
@ -31,6 +31,136 @@
// TODO: Move texMem somewhere else so this isn't an issue. // TODO: Move texMem somewhere else so this isn't an issue.
#include "VideoCommon/TextureDecoder.h" #include "VideoCommon/TextureDecoder.h"
namespace
{
class FifoPlaybackAnalyzer : public OpcodeDecoder::Callback
{
public:
static void AnalyzeFrames(FifoDataFile* file, std::vector<AnalyzedFrameInfo>& frame_info);
explicit FifoPlaybackAnalyzer(const u32* cpmem) : m_cpmem(cpmem) {}
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); }
OPCODE_CALLBACK(void OnBP(u8 command, u32 value));
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) {}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data));
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) {}
OPCODE_CALLBACK(void OnNop(u32 count));
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size));
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
bool m_start_of_primitives = false;
bool m_end_of_primitives = false;
bool m_efb_copy = false;
// Internal state, copied to above in OnCommand
bool m_was_primitive = false;
bool m_is_primitive = false;
bool m_is_copy = false;
bool m_is_nop = false;
CPState m_cpmem;
};
void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file,
std::vector<AnalyzedFrameInfo>& frame_info)
{
FifoPlaybackAnalyzer analyzer(file->GetCPMem());
frame_info.clear();
frame_info.resize(file->GetFrameCount());
for (u32 frame_no = 0; frame_no < file->GetFrameCount(); frame_no++)
{
const FifoFrameInfo& frame = file->GetFrame(frame_no);
AnalyzedFrameInfo& analyzed = frame_info[frame_no];
u32 offset = 0;
u32 part_start = 0;
CPState cpmem;
while (offset < frame.fifoData.size())
{
const u32 cmd_size = OpcodeDecoder::RunCommand(&frame.fifoData[offset],
u32(frame.fifoData.size()) - offset, analyzer);
if (analyzer.m_start_of_primitives)
{
// Start of primitive data for an object
analyzed.AddPart(FramePartType::Commands, part_start, offset, analyzer.m_cpmem);
part_start = offset;
// Copy cpmem now, because end_of_primitives isn't triggered until the first opcode after
// primitive data, and the first opcode might update cpmem
std::memcpy(&cpmem, &analyzer.m_cpmem, sizeof(CPState));
}
if (analyzer.m_end_of_primitives)
{
// End of primitive data for an object, and thus end of the object
analyzed.AddPart(FramePartType::PrimitiveData, part_start, offset, cpmem);
part_start = offset;
}
offset += cmd_size;
if (analyzer.m_efb_copy)
{
// We increase the offset beforehand, so that the trigger EFB copy command is included.
analyzed.AddPart(FramePartType::EFBCopy, part_start, offset, analyzer.m_cpmem);
part_start = offset;
}
}
// The frame should end with an EFB copy, so part_start should have been updated to the end.
ASSERT(part_start == frame.fifoData.size());
ASSERT(offset == frame.fifoData.size());
}
}
void FifoPlaybackAnalyzer::OnBP(u8 command, u32 value)
{
if (command == BPMEM_TRIGGER_EFB_COPY)
m_is_copy = true;
}
void FifoPlaybackAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data)
{
m_is_primitive = true;
}
void FifoPlaybackAnalyzer::OnNop(u32 count)
{
m_is_nop = true;
}
void FifoPlaybackAnalyzer::OnCommand(const u8* data, u32 size)
{
m_start_of_primitives = false;
m_end_of_primitives = false;
m_efb_copy = false;
if (!m_is_nop)
{
if (m_is_primitive && !m_was_primitive)
m_start_of_primitives = true;
else if (m_was_primitive && !m_is_primitive)
m_end_of_primitives = true;
else if (m_is_copy)
m_efb_copy = true;
m_was_primitive = m_is_primitive;
}
m_is_primitive = false;
m_is_copy = false;
m_is_nop = false;
}
} // namespace
bool IsPlayingBackFifologWithBrokenEFBCopies = false; bool IsPlayingBackFifologWithBrokenEFBCopies = false;
FifoPlayer::FifoPlayer() : m_Loop{SConfig::GetInstance().bLoopFifoReplay} FifoPlayer::FifoPlayer() : m_Loop{SConfig::GetInstance().bLoopFifoReplay}
@ -191,7 +321,7 @@ u32 FifoPlayer::GetMaxObjectCount() const
u32 result = 0; u32 result = 0;
for (auto& frame : m_FrameInfo) for (auto& frame : m_FrameInfo)
{ {
const u32 count = static_cast<u32>(frame.objectStarts.size()); const u32 count = frame.part_type_counts[FramePartType::PrimitiveData];
if (count > result) if (count > result)
result = count; result = count;
} }
@ -202,7 +332,7 @@ u32 FifoPlayer::GetFrameObjectCount(u32 frame) const
{ {
if (frame < m_FrameInfo.size()) if (frame < m_FrameInfo.size())
{ {
return static_cast<u32>(m_FrameInfo[frame].objectStarts.size()); return m_FrameInfo[frame].part_type_counts[FramePartType::PrimitiveData];
} }
return 0; return 0;
@ -262,55 +392,35 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo&
m_ElapsedCycles = 0; m_ElapsedCycles = 0;
m_FrameFifoSize = static_cast<u32>(frame.fifoData.size()); m_FrameFifoSize = static_cast<u32>(frame.fifoData.size());
// Determine start and end objects u32 memory_update = 0;
u32 numObjects = (u32)(info.objectStarts.size()); u32 object_num = 0;
u32 drawStart = std::min(numObjects, m_ObjectRangeStart);
u32 drawEnd = std::min(numObjects - 1, m_ObjectRangeEnd);
u32 position = 0; // Skip all memory updates if early memory updates are enabled, as we already wrote them
u32 memoryUpdate = 0;
// Skip memory updates during frame if true
if (m_EarlyMemoryUpdates) if (m_EarlyMemoryUpdates)
{ {
memoryUpdate = (u32)(frame.memoryUpdates.size()); memory_update = (u32)(frame.memoryUpdates.size());
} }
if (numObjects > 0) for (const FramePart& part : info.parts)
{ {
u32 objectNum = 0; bool show_part;
// Write fifo data skipping objects before the draw range if (part.m_type == FramePartType::PrimitiveData)
while (objectNum < drawStart)
{ {
WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info); show_part = m_ObjectRangeStart <= object_num && object_num <= m_ObjectRangeEnd;
object_num++;
position = info.objectEnds[objectNum]; }
++objectNum; else
{
// We always include commands and EFB copies, as commands from earlier objects still apply to
// later ones (games generally do not reconfigure everything for each object)
show_part = true;
} }
// Write objects in draw range if (show_part)
if (objectNum < numObjects && drawStart <= drawEnd) WriteFramePart(part, &memory_update, frame);
{
objectNum = drawEnd;
WriteFramePart(position, info.objectEnds[objectNum], memoryUpdate, frame, info);
position = info.objectEnds[objectNum];
++objectNum;
}
// Write fifo data skipping objects after the draw range
while (objectNum < numObjects)
{
WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info);
position = info.objectEnds[objectNum];
++objectNum;
}
} }
// Write data after the last object
WriteFramePart(position, static_cast<u32>(frame.fifoData.size()), memoryUpdate, frame, info);
FlushWGP(); FlushWGP();
// Sleep while the GPU is active // Sleep while the GPU is active
@ -321,36 +431,39 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo&
} }
} }
void FifoPlayer::WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, void FifoPlayer::WriteFramePart(const FramePart& part, u32* next_mem_update,
const FifoFrameInfo& frame, const AnalyzedFrameInfo& info) const FifoFrameInfo& frame)
{ {
const u8* const data = frame.fifoData.data(); const u8* const data = frame.fifoData.data();
while (nextMemUpdate < frame.memoryUpdates.size() && dataStart < dataEnd) u32 data_start = part.m_start;
{ const u32 data_end = part.m_end;
const MemoryUpdate& memUpdate = info.memoryUpdates[nextMemUpdate];
if (memUpdate.fifoPosition < dataEnd) while (*next_mem_update < frame.memoryUpdates.size() && data_start < data_end)
{
const MemoryUpdate& memUpdate = frame.memoryUpdates[*next_mem_update];
if (memUpdate.fifoPosition < data_end)
{ {
if (dataStart < memUpdate.fifoPosition) if (data_start < memUpdate.fifoPosition)
{ {
WriteFifo(data, dataStart, memUpdate.fifoPosition); WriteFifo(data, data_start, memUpdate.fifoPosition);
dataStart = memUpdate.fifoPosition; data_start = memUpdate.fifoPosition;
} }
WriteMemory(memUpdate); WriteMemory(memUpdate);
++nextMemUpdate; ++*next_mem_update;
} }
else else
{ {
WriteFifo(data, dataStart, dataEnd); WriteFifo(data, data_start, data_end);
dataStart = dataEnd; data_start = data_end;
} }
} }
if (dataStart < dataEnd) if (data_start < data_end)
WriteFifo(data, dataStart, dataEnd); WriteFifo(data, data_start, data_end);
} }
void FifoPlayer::WriteAllMemoryUpdates() void FifoPlayer::WriteAllMemoryUpdates()

View File

@ -5,16 +5,18 @@
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <set>
#include <string> #include <string>
#include <vector> #include <vector>
#include "Common/Assert.h"
#include "Core/FifoPlayer/FifoDataFile.h" #include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include "Core/PowerPC/CPUCoreBase.h" #include "Core/PowerPC/CPUCoreBase.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/OpcodeDecoding.h"
class FifoDataFile; class FifoDataFile;
struct MemoryUpdate; struct MemoryUpdate;
struct AnalyzedFrameInfo;
namespace CPU namespace CPU
{ {
@ -43,16 +45,46 @@ enum class State;
// 8. The output of fifoplayer would be wrong. // 8. The output of fifoplayer would be wrong.
// To keep compatibility with old fifologs, we have this flag which signals texture cache to not // To keep compatibility with old fifologs, we have this flag which signals texture cache to not
// bother // bother hashing the memory and just assume the hash matched.
// hashing the memory and just assume the hash matched.
// At a later point proper efb copy support should be added to fiforecorder and this flag will // At a later point proper efb copy support should be added to fiforecorder and this flag will
// change // change based on the version of the .dff file, but until then it will always be true when a
// based on the version of the .dff file, but until then it will always be true when a fifolog is // fifolog is playing.
// playing.
// Shitty global to fix a shitty problem // Shitty global to fix a shitty problem
extern bool IsPlayingBackFifologWithBrokenEFBCopies; extern bool IsPlayingBackFifologWithBrokenEFBCopies;
enum class FramePartType
{
Commands,
PrimitiveData,
EFBCopy,
};
struct FramePart
{
constexpr FramePart(FramePartType type, u32 start, u32 end, const CPState& cpmem)
: m_type(type), m_start(start), m_end(end), m_cpmem(cpmem)
{
}
const FramePartType m_type;
const u32 m_start;
const u32 m_end;
const CPState m_cpmem;
};
struct AnalyzedFrameInfo
{
std::vector<FramePart> parts;
Common::EnumMap<u32, FramePartType::EFBCopy> part_type_counts;
void AddPart(FramePartType type, u32 start, u32 end, const CPState& cpmem)
{
parts.emplace_back(type, start, end, cpmem);
part_type_counts[type]++;
}
};
class FifoPlayer class FifoPlayer
{ {
public: public:
@ -102,14 +134,12 @@ public:
private: private:
class CPUCore; class CPUCore;
FifoPlayer(); FifoPlayer();
CPU::State AdvanceFrame(); CPU::State AdvanceFrame();
void WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& info); void WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& info);
void WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, const FifoFrameInfo& frame, void WriteFramePart(const FramePart& part, u32* next_mem_update, const FifoFrameInfo& frame);
const AnalyzedFrameInfo& info);
void WriteAllMemoryUpdates(); void WriteAllMemoryUpdates();
void WriteMemory(const MemoryUpdate& memUpdate); void WriteMemory(const MemoryUpdate& memUpdate);

View File

@ -1,103 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/FifoPlayer/FifoRecordAnalyzer.h"
#include <algorithm>
#include "Common/MsgHandler.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
using namespace FifoAnalyzer;
void FifoRecordAnalyzer::Initialize(const u32* cpMem)
{
s_DrawingObject = false;
FifoAnalyzer::LoadCPReg(VCD_LO, cpMem[VCD_LO], s_CpMem);
FifoAnalyzer::LoadCPReg(VCD_HI, cpMem[VCD_HI], s_CpMem);
for (u32 i = 0; i < CP_NUM_VAT_REG; ++i)
FifoAnalyzer::LoadCPReg(CP_VAT_REG_A + i, cpMem[CP_VAT_REG_A + i], s_CpMem);
const u32* const bases_start = cpMem + ARRAY_BASE;
const u32* const bases_end = bases_start + s_CpMem.arrayBases.size();
std::copy(bases_start, bases_end, s_CpMem.arrayBases.begin());
const u32* const strides_start = cpMem + ARRAY_STRIDE;
const u32* const strides_end = strides_start + s_CpMem.arrayStrides.size();
std::copy(strides_start, strides_end, s_CpMem.arrayStrides.begin());
}
void FifoRecordAnalyzer::ProcessLoadIndexedXf(u32 val, int array)
{
int index = val >> 16;
int size = ((val >> 12) & 0xF) + 1;
u32 address = s_CpMem.arrayBases[array] + s_CpMem.arrayStrides[array] * index;
FifoRecorder::GetInstance().UseMemory(address, size * 4, MemoryUpdate::XF_DATA);
}
void FifoRecordAnalyzer::WriteVertexArray(int arrayIndex, const u8* vertexData, int vertexSize,
int numVertices)
{
// Skip if not indexed array
VertexComponentFormat arrayType;
if (arrayIndex == ARRAY_POSITION)
arrayType = s_CpMem.vtxDesc.low.Position;
else if (arrayIndex == ARRAY_NORMAL)
arrayType = s_CpMem.vtxDesc.low.Normal;
else if (arrayIndex >= ARRAY_COLOR0 && arrayIndex < ARRAY_COLOR0 + NUM_COLOR_ARRAYS)
arrayType = s_CpMem.vtxDesc.low.Color[arrayIndex - ARRAY_COLOR0];
else if (arrayIndex >= ARRAY_TEXCOORD0 && arrayIndex < ARRAY_TEXCOORD0 + NUM_TEXCOORD_ARRAYS)
arrayType = s_CpMem.vtxDesc.high.TexCoord[arrayIndex - ARRAY_TEXCOORD0];
else
{
PanicAlertFmt("Invalid arrayIndex {}", arrayIndex);
return;
}
if (!IsIndexed(arrayType))
return;
int maxIndex = 0;
// Determine min and max indices
if (arrayType == VertexComponentFormat::Index8)
{
for (int i = 0; i < numVertices; ++i)
{
int index = *vertexData;
vertexData += vertexSize;
// 0xff skips the vertex
if (index != 0xff)
{
if (index > maxIndex)
maxIndex = index;
}
}
}
else
{
for (int i = 0; i < numVertices; ++i)
{
int index = Common::swap16(vertexData);
vertexData += vertexSize;
// 0xffff skips the vertex
if (index != 0xffff)
{
if (index > maxIndex)
maxIndex = index;
}
}
}
u32 arrayStart = s_CpMem.arrayBases[arrayIndex];
u32 arraySize = s_CpMem.arrayStrides[arrayIndex] * (maxIndex + 1);
FifoRecorder::GetInstance().UseMemory(arrayStart, arraySize, MemoryUpdate::VERTEX_STREAM);
}

View File

@ -1,15 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "Common/CommonTypes.h"
namespace FifoRecordAnalyzer
{
// Must call this before analyzing Fifo commands with FifoAnalyzer::AnalyzeCommand()
void Initialize(const u32* cpMem);
void ProcessLoadIndexedXf(u32 val, int array);
void WriteVertexArray(int arrayIndex, const u8* vertexData, int vertexSize, int numVertices);
} // namespace FifoRecordAnalyzer

View File

@ -6,13 +6,168 @@
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "Common/Thread.h" #include "Common/Thread.h"
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoRecordAnalyzer.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/XFStructs.h"
class FifoRecorder::FifoRecordAnalyzer : public OpcodeDecoder::Callback
{
public:
explicit FifoRecordAnalyzer(FifoRecorder* owner) : m_owner(owner) {}
explicit FifoRecordAnalyzer(FifoRecorder* owner, const u32* cpmem)
: m_owner(owner), m_cpmem(cpmem)
{
}
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); }
OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) {}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size));
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data));
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size))
{
WARN_LOG_FMT(VIDEO,
"Unhandled display list call {:08x} {:08x}; should have been inlined earlier",
address, size);
}
OPCODE_CALLBACK(void OnNop(u32 count)) {}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {}
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
private:
void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size, u16 num_vertices,
const u8* vertex_data);
FifoRecorder* const m_owner;
CPState m_cpmem;
};
void FifoRecorder::FifoRecordAnalyzer::OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)
{
const u32 load_address = m_cpmem.array_bases[array] + m_cpmem.array_strides[array] * index;
m_owner->UseMemory(load_address, size * sizeof(u32), MemoryUpdate::XF_DATA);
}
// TODO: The following code is copied with modifications from VertexLoaderBase.
// Surely there's a better solution?
#include "VideoCommon/VertexLoader_Color.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoader_TextCoord.h"
void FifoRecorder::FifoRecordAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive,
u8 vat, u32 vertex_size, u16 num_vertices,
const u8* vertex_data)
{
const auto& vtx_desc = m_cpmem.vtx_desc;
const auto& vtx_attr = m_cpmem.vtx_attr[vat];
u32 offset = 0;
if (vtx_desc.low.PosMatIdx)
offset++;
for (auto texmtxidx : vtx_desc.low.TexMatIdx)
{
if (texmtxidx)
offset++;
}
const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
vtx_attr.g0.PosElements);
ProcessVertexComponent(CPArray::Position, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += pos_size;
const u32 norm_size =
VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3);
ProcessVertexComponent(CPArray::Normal, vtx_desc.low.Position, offset, vertex_size, num_vertices,
vertex_data);
offset += norm_size;
for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
{
const u32 color_size =
VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i));
ProcessVertexComponent(CPArray::Color0 + i, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += color_size;
}
for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
{
const u32 tc_size = VertexLoader_TextCoord::GetSize(
vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i));
ProcessVertexComponent(CPArray::TexCoord0 + i, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += tc_size;
}
ASSERT(offset == vertex_size);
}
// If a component is indexed, the array it indexes into for data must be saved.
void FifoRecorder::FifoRecordAnalyzer::ProcessVertexComponent(CPArray array_index,
VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size,
u16 num_vertices,
const u8* vertex_data)
{
// Skip if not indexed array
if (!IsIndexed(array_type))
return;
u16 max_index = 0;
// Determine min and max indices
if (array_type == VertexComponentFormat::Index8)
{
for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
const u8 index = vertex_data[component_offset];
vertex_data += vertex_size;
// 0xff skips the vertex
if (index != 0xff)
{
if (index > max_index)
max_index = index;
}
}
}
else
{
for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
const u16 index = Common::swap16(&vertex_data[component_offset]);
vertex_data += vertex_size;
// 0xffff skips the vertex
if (index != 0xffff)
{
if (index > max_index)
max_index = index;
}
}
}
const u32 array_start = m_cpmem.array_bases[array_index];
const u32 array_size = m_cpmem.array_strides[array_index] * (max_index + 1);
m_owner->UseMemory(array_start, array_size, MemoryUpdate::VERTEX_STREAM);
}
static FifoRecorder instance; static FifoRecorder instance;
FifoRecorder::FifoRecorder() = default; FifoRecorder::FifoRecorder() = default;
@ -76,7 +231,7 @@ void FifoRecorder::WriteGPCommand(const u8* data, u32 size)
{ {
// Assumes data contains all information for the command // Assumes data contains all information for the command
// Calls FifoRecorder::UseMemory // Calls FifoRecorder::UseMemory
const u32 analyzed_size = FifoAnalyzer::AnalyzeCommand(data, FifoAnalyzer::DecodeMode::Record); const u32 analyzed_size = OpcodeDecoder::RunCommand(data, size, *m_record_analyzer);
// Make sure FifoPlayer's command analyzer agrees about the size of the command. // Make sure FifoPlayer's command analyzer agrees about the size of the command.
if (analyzed_size != size) if (analyzed_size != size)
@ -211,7 +366,7 @@ void FifoRecorder::SetVideoMemory(const u32* bpMem, const u32* cpMem, const u32*
memcpy(m_File->GetTexMem(), texMem, FifoDataFile::TEX_MEM_SIZE); memcpy(m_File->GetTexMem(), texMem, FifoDataFile::TEX_MEM_SIZE);
} }
FifoRecordAnalyzer::Initialize(cpMem); m_record_analyzer = std::make_unique<FifoRecordAnalyzer>(this, cpMem);
} }
bool FifoRecorder::IsRecording() const bool FifoRecorder::IsRecording() const

View File

@ -8,6 +8,7 @@
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include "Common/Assert.h"
#include "Core/FifoPlayer/FifoDataFile.h" #include "Core/FifoPlayer/FifoDataFile.h"
class FifoRecorder class FifoRecorder
@ -47,6 +48,8 @@ public:
static FifoRecorder& GetInstance(); static FifoRecorder& GetInstance();
private: private:
class FifoRecordAnalyzer;
// Accessed from both GUI and video threads // Accessed from both GUI and video threads
std::recursive_mutex m_mutex; std::recursive_mutex m_mutex;
@ -65,6 +68,7 @@ private:
bool m_SkipFutureData = true; bool m_SkipFutureData = true;
bool m_FrameEnded = false; bool m_FrameEnded = false;
FifoFrameInfo m_CurrentFrame; FifoFrameInfo m_CurrentFrame;
std::unique_ptr<FifoRecordAnalyzer> m_record_analyzer;
std::vector<u8> m_FifoData; std::vector<u8> m_FifoData;
std::vector<u8> m_Ram; std::vector<u8> m_Ram;
std::vector<u8> m_ExRam; std::vector<u8> m_ExRam;

View File

@ -217,11 +217,8 @@
<ClInclude Include="Core\DSP\Jit\x64\DSPJitTables.h" /> <ClInclude Include="Core\DSP\Jit\x64\DSPJitTables.h" />
<ClInclude Include="Core\DSP\LabelMap.h" /> <ClInclude Include="Core\DSP\LabelMap.h" />
<ClInclude Include="Core\DSPEmulator.h" /> <ClInclude Include="Core\DSPEmulator.h" />
<ClInclude Include="Core\FifoPlayer\FifoAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoDataFile.h" /> <ClInclude Include="Core\FifoPlayer\FifoDataFile.h" />
<ClInclude Include="Core\FifoPlayer\FifoPlaybackAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoPlayer.h" /> <ClInclude Include="Core\FifoPlayer\FifoPlayer.h" />
<ClInclude Include="Core\FifoPlayer\FifoRecordAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoRecorder.h" /> <ClInclude Include="Core\FifoPlayer\FifoRecorder.h" />
<ClInclude Include="Core\FreeLookConfig.h" /> <ClInclude Include="Core\FreeLookConfig.h" />
<ClInclude Include="Core\FreeLookManager.h" /> <ClInclude Include="Core\FreeLookManager.h" />
@ -815,11 +812,8 @@
<ClCompile Include="Core\DSP\Jit\x64\DSPJitUtil.cpp" /> <ClCompile Include="Core\DSP\Jit\x64\DSPJitUtil.cpp" />
<ClCompile Include="Core\DSP\LabelMap.cpp" /> <ClCompile Include="Core\DSP\LabelMap.cpp" />
<ClCompile Include="Core\DSPEmulator.cpp" /> <ClCompile Include="Core\DSPEmulator.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoDataFile.cpp" /> <ClCompile Include="Core\FifoPlayer\FifoDataFile.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoPlaybackAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoPlayer.cpp" /> <ClCompile Include="Core\FifoPlayer\FifoPlayer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoRecordAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoRecorder.cpp" /> <ClCompile Include="Core\FifoPlayer\FifoRecorder.cpp" />
<ClCompile Include="Core\FreeLookConfig.cpp" /> <ClCompile Include="Core\FreeLookConfig.cpp" />
<ClCompile Include="Core\FreeLookManager.cpp" /> <ClCompile Include="Core\FreeLookManager.cpp" />

View File

@ -3,6 +3,8 @@
#include "DolphinQt/FIFO/FIFOAnalyzer.h" #include "DolphinQt/FIFO/FIFOAnalyzer.h"
#include <algorithm>
#include <QGroupBox> #include <QGroupBox>
#include <QHBoxLayout> #include <QHBoxLayout>
#include <QHeaderView> #include <QHeaderView>
@ -27,8 +29,12 @@
#include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/XFStructs.h" #include "VideoCommon/XFStructs.h"
// Values range from 0 to number of frames - 1
constexpr int FRAME_ROLE = Qt::UserRole; constexpr int FRAME_ROLE = Qt::UserRole;
constexpr int OBJECT_ROLE = Qt::UserRole + 1; // Values range from 0 to number of parts - 1
constexpr int PART_START_ROLE = Qt::UserRole + 1;
// Values range from 1 to number of parts
constexpr int PART_END_ROLE = Qt::UserRole + 2;
FIFOAnalyzer::FIFOAnalyzer() FIFOAnalyzer::FIFOAnalyzer()
{ {
@ -144,43 +150,175 @@ void FIFOAnalyzer::UpdateTree()
auto* file = FifoPlayer::GetInstance().GetFile(); auto* file = FifoPlayer::GetInstance().GetFile();
const u32 frame_count = file->GetFrameCount(); const u32 frame_count = file->GetFrameCount();
for (u32 frame = 0; frame < frame_count; frame++) for (u32 frame = 0; frame < frame_count; frame++)
{ {
auto* frame_item = new QTreeWidgetItem({tr("Frame %1").arg(frame)}); auto* frame_item = new QTreeWidgetItem({tr("Frame %1").arg(frame)});
recording_item->addChild(frame_item); recording_item->addChild(frame_item);
const u32 object_count = FifoPlayer::GetInstance().GetFrameObjectCount(frame); const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame);
for (u32 object = 0; object < object_count; object++) ASSERT(frame_info.parts.size() != 0);
Common::EnumMap<u32, FramePartType::EFBCopy> part_counts;
u32 part_start = 0;
for (u32 part_nr = 0; part_nr < frame_info.parts.size(); part_nr++)
{ {
auto* object_item = new QTreeWidgetItem({tr("Object %1").arg(object)}); const auto& part = frame_info.parts[part_nr];
frame_item->addChild(object_item); const u32 part_type_nr = part_counts[part.m_type];
part_counts[part.m_type]++;
object_item->setData(0, FRAME_ROLE, frame); QTreeWidgetItem* object_item = nullptr;
object_item->setData(0, OBJECT_ROLE, object); if (part.m_type == FramePartType::PrimitiveData)
object_item = new QTreeWidgetItem({tr("Object %1").arg(part_type_nr)});
else if (part.m_type == FramePartType::EFBCopy)
object_item = new QTreeWidgetItem({tr("EFB copy %1").arg(part_type_nr)});
// We don't create dedicated labels for FramePartType::Command;
// those are grouped with the primitive
if (object_item != nullptr)
{
frame_item->addChild(object_item);
object_item->setData(0, FRAME_ROLE, frame);
object_item->setData(0, PART_START_ROLE, part_start);
object_item->setData(0, PART_END_ROLE, part_nr);
part_start = part_nr + 1;
}
} }
// We shouldn't end on a Command (it should end with an EFB copy)
ASSERT(part_start == frame_info.parts.size());
// The counts we computed should match the frame's counts
ASSERT(std::equal(frame_info.part_type_counts.begin(), frame_info.part_type_counts.end(),
part_counts.begin()));
} }
} }
static std::string GetPrimitiveName(u8 cmd) namespace
{ {
if ((cmd & 0xC0) != 0x80) class DetailCallback : public OpcodeDecoder::Callback
{
public:
explicit DetailCallback(CPState cpmem) : m_cpmem(cpmem) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
{ {
PanicAlertFmt("Not a primitive command: {:#04x}", cmd); // Note: No need to update m_cpmem as it already has the final value for this object
return "";
const auto [name, desc] = GetCPRegInfo(command, value);
ASSERT(!name.empty());
text = QStringLiteral("CP %1 %2 %3")
.arg(command, 2, 16, QLatin1Char('0'))
.arg(value, 8, 16, QLatin1Char('0'))
.arg(QString::fromStdString(name));
} }
const u8 vat = cmd & OpcodeDecoder::GX_VAT_MASK; // Vertex loader index (0 - 7)
const u8 primitive = OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
(cmd & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT; {
static constexpr std::array<const char*, 8> names = { const auto [name, desc] = GetXFTransferInfo(address, count, data);
"GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)", ASSERT(!name.empty());
"GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP",
"GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES", const u32 command = address | (count << 16);
"GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS",
}; text = QStringLiteral("XF %1 ").arg(command, 8, 16, QLatin1Char('0'));
return fmt::format("{} VAT {}", names[primitive], vat);
} for (u8 i = 0; i < count; i++)
{
const u32 value = Common::swap32(&data[i * 4]);
text += QStringLiteral("%1 ").arg(value, 8, 16, QLatin1Char('0'));
}
text += QStringLiteral(" ") + QString::fromStdString(name);
}
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
{
const auto [name, desc] = GetBPRegInfo(command, value);
ASSERT(!name.empty());
text = QStringLiteral("BP %1 %2 %3")
.arg(command, 2, 16, QLatin1Char('0'))
.arg(value, 6, 16, QLatin1Char('0'))
.arg(QString::fromStdString(name));
}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
{
const auto [desc, written] = GetXFIndexedLoadInfo(array, index, address, size);
text = QStringLiteral("LOAD INDX %1 %2")
.arg(QString::fromStdString(fmt::to_string(array)))
.arg(QString::fromStdString(desc));
}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
{
const auto name = fmt::to_string(primitive);
// Note that vertex_count is allowed to be 0, with no special treatment
// (another command just comes right after the current command, with no vertices in between)
const u32 object_prim_size = num_vertices * vertex_size;
const u8 opcode =
0x80 | (static_cast<u8>(primitive) << OpcodeDecoder::GX_PRIMITIVE_SHIFT) | vat;
text = QStringLiteral("PRIMITIVE %1 (%2) %3 vertices %4 bytes/vertex %5 total bytes")
.arg(QString::fromStdString(name))
.arg(opcode, 2, 16, QLatin1Char('0'))
.arg(num_vertices)
.arg(vertex_size)
.arg(object_prim_size);
// It's not really useful to have a massive unreadable hex string for the object primitives.
// Put it in the description instead.
// #define INCLUDE_HEX_IN_PRIMITIVES
#ifdef INCLUDE_HEX_IN_PRIMITIVES
text += QStringLiteral(" ");
for (u32 i = 0; i < object_prim_size; i++)
{
text += QStringLiteral("%1").arg(vertex_data[i], 2, 16, QLatin1Char('0'));
}
#endif
}
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size))
{
text = QObject::tr("Call display list at %1 with size %2")
.arg(address, 8, 16, QLatin1Char('0'))
.arg(size, 8, 16, QLatin1Char('0'));
}
OPCODE_CALLBACK(void OnNop(u32 count))
{
if (count > 1)
text = QStringLiteral("NOP (%1x)").arg(count);
else
text = QStringLiteral("NOP");
}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
{
using OpcodeDecoder::Opcode;
if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS)
text = QStringLiteral("GX_CMD_UNKNOWN_METRICS");
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_INVL_VC)
text = QStringLiteral("GX_CMD_INVL_VC");
else
text = QStringLiteral("Unknown opcode %1").arg(opcode, 2, 16);
}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {}
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
QString text;
CPState m_cpmem;
};
} // namespace
void FIFOAnalyzer::UpdateDetails() void FIFOAnalyzer::UpdateDetails()
{ {
@ -200,205 +338,40 @@ void FIFOAnalyzer::UpdateDetails()
const auto items = m_tree_widget->selectedItems(); const auto items = m_tree_widget->selectedItems();
if (items.isEmpty() || items[0]->data(0, OBJECT_ROLE).isNull()) if (items.isEmpty() || items[0]->data(0, PART_START_ROLE).isNull())
return; return;
const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt();
const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt();
const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt();
const auto& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr);
const auto& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); const auto& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr);
// Note that frame_info.objectStarts[object_nr] is the start of the primitive data, const u32 object_start = frame_info.parts[start_part_nr].m_start;
// but we want to start with the register updates which happen before that. const u32 object_end = frame_info.parts[end_part_nr].m_end;
const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); const u32 object_size = object_end - object_start;
const u32 object_size = frame_info.objectEnds[object_nr] - object_start;
const u8* const object = &fifo_frame.fifoData[object_start];
u32 object_offset = 0; u32 object_offset = 0;
// NOTE: object_info.m_cpmem is the state of cpmem _after_ all of the commands in this object.
// However, it doesn't matter that it doesn't match the start, since it will match by the time
// primitives are reached.
auto callback = DetailCallback(frame_info.parts[end_part_nr].m_cpmem);
while (object_offset < object_size) while (object_offset < object_size)
{ {
QString new_label;
const u32 start_offset = object_offset; const u32 start_offset = object_offset;
m_object_data_offsets.push_back(start_offset); m_object_data_offsets.push_back(start_offset);
const u8 command = object[object_offset++]; object_offset += OpcodeDecoder::RunCommand(&fifo_frame.fifoData[object_start + start_offset],
switch (command) object_size - start_offset, callback);
{
case OpcodeDecoder::GX_NOP:
if (object[object_offset] == OpcodeDecoder::GX_NOP)
{
u32 nop_count = 2;
while (object[++object_offset] == OpcodeDecoder::GX_NOP)
nop_count++;
new_label = QStringLiteral("NOP (%1x)").arg(nop_count); QString new_label =
} QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) +
else callback.text;
{
new_label = QStringLiteral("NOP");
}
break;
case OpcodeDecoder::GX_CMD_UNKNOWN_METRICS:
new_label = QStringLiteral("GX_CMD_UNKNOWN_METRICS");
break;
case OpcodeDecoder::GX_CMD_INVL_VC:
new_label = QStringLiteral("GX_CMD_INVL_VC");
break;
case OpcodeDecoder::GX_LOAD_CP_REG:
{
const u8 cmd2 = object[object_offset++];
const u32 value = Common::swap32(&object[object_offset]);
object_offset += 4;
const auto [name, desc] = GetCPRegInfo(cmd2, value);
ASSERT(!name.empty());
new_label = QStringLiteral("CP %1 %2 %3")
.arg(cmd2, 2, 16, QLatin1Char('0'))
.arg(value, 8, 16, QLatin1Char('0'))
.arg(QString::fromStdString(name));
}
break;
case OpcodeDecoder::GX_LOAD_XF_REG:
{
const auto [name, desc] = GetXFTransferInfo(&object[object_offset]);
const u32 cmd2 = Common::swap32(&object[object_offset]);
object_offset += 4;
ASSERT(!name.empty());
const u8 stream_size = ((cmd2 >> 16) & 15) + 1;
new_label = QStringLiteral("XF %1 ").arg(cmd2, 8, 16, QLatin1Char('0'));
for (u8 i = 0; i < stream_size; i++)
{
const u32 value = Common::swap32(&object[object_offset]);
object_offset += 4;
new_label += QStringLiteral("%1 ").arg(value, 8, 16, QLatin1Char('0'));
}
new_label += QStringLiteral(" ") + QString::fromStdString(name);
}
break;
case OpcodeDecoder::GX_LOAD_INDX_A:
{
const auto [desc, written] =
GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(&object[object_offset]));
object_offset += 4;
new_label = QStringLiteral("LOAD INDX A %1").arg(QString::fromStdString(desc));
}
break;
case OpcodeDecoder::GX_LOAD_INDX_B:
{
const auto [desc, written] =
GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(&object[object_offset]));
object_offset += 4;
new_label = QStringLiteral("LOAD INDX B %1").arg(QString::fromStdString(desc));
}
break;
case OpcodeDecoder::GX_LOAD_INDX_C:
{
const auto [desc, written] =
GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(&object[object_offset]));
object_offset += 4;
new_label = QStringLiteral("LOAD INDX C %1").arg(QString::fromStdString(desc));
}
break;
case OpcodeDecoder::GX_LOAD_INDX_D:
{
const auto [desc, written] =
GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(&object[object_offset]));
object_offset += 4;
new_label = QStringLiteral("LOAD INDX D %1").arg(QString::fromStdString(desc));
}
break;
case OpcodeDecoder::GX_CMD_CALL_DL:
// The recorder should have expanded display lists into the fifo stream and skipped the
// call to start them
// That is done to make it easier to track where memory is updated
ASSERT(false);
object_offset += 8;
new_label = QStringLiteral("CALL DL");
break;
case OpcodeDecoder::GX_LOAD_BP_REG:
{
const u8 cmd2 = object[object_offset++];
const u32 cmddata = Common::swap24(&object[object_offset]);
object_offset += 3;
const auto [name, desc] = GetBPRegInfo(cmd2, cmddata);
ASSERT(!name.empty());
new_label = QStringLiteral("BP %1 %2 %3")
.arg(cmd2, 2, 16, QLatin1Char('0'))
.arg(cmddata, 6, 16, QLatin1Char('0'))
.arg(QString::fromStdString(name));
}
break;
default:
if ((command & 0xC0) == 0x80)
{
// Object primitive data
const u8 vat = command & OpcodeDecoder::GX_VAT_MASK;
const auto& vtx_desc = frame_info.objectCPStates[object_nr].vtxDesc;
const auto& vtx_attr = frame_info.objectCPStates[object_nr].vtxAttr[vat];
const auto name = GetPrimitiveName(command);
const u16 vertex_count = Common::swap16(&object[object_offset]);
object_offset += 2;
const u32 vertex_size = VertexLoaderBase::GetVertexSize(vtx_desc, vtx_attr);
// Note that vertex_count is allowed to be 0, with no special treatment
// (another command just comes right after the current command, with no vertices in between)
const u32 object_prim_size = vertex_count * vertex_size;
new_label = QStringLiteral("PRIMITIVE %1 (%2) %3 vertices %4 bytes/vertex %5 total bytes")
.arg(QString::fromStdString(name))
.arg(command, 2, 16, QLatin1Char('0'))
.arg(vertex_count)
.arg(vertex_size)
.arg(object_prim_size);
// It's not really useful to have a massive unreadable hex string for the object primitives.
// Put it in the description instead.
// #define INCLUDE_HEX_IN_PRIMITIVES
#ifdef INCLUDE_HEX_IN_PRIMITIVES
new_label += QStringLiteral(" ");
for (u32 i = 0; i < object_prim_size; i++)
{
new_label += QStringLiteral("%1").arg(object[object_offset++], 2, 16, QLatin1Char('0'));
}
#else
object_offset += object_prim_size;
#endif
}
else
{
new_label = QStringLiteral("Unknown opcode %1").arg(command, 2, 16);
}
break;
}
new_label = QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) +
new_label;
m_detail_list->addItem(new_label); m_detail_list->addItem(new_label);
} }
ASSERT(object_offset == object_size);
// Needed to ensure the description updates when changing objects // Needed to ensure the description updates when changing objects
m_detail_list->setCurrentRow(0); m_detail_list->setCurrentRow(0);
} }
@ -413,12 +386,15 @@ void FIFOAnalyzer::BeginSearch()
const auto items = m_tree_widget->selectedItems(); const auto items = m_tree_widget->selectedItems();
if (items.isEmpty() || items[0]->data(0, FRAME_ROLE).isNull() || if (items.isEmpty() || items[0]->data(0, FRAME_ROLE).isNull() ||
items[0]->data(0, OBJECT_ROLE).isNull()) items[0]->data(0, PART_START_ROLE).isNull())
{ {
m_search_label->setText(tr("Invalid search parameters (no object selected)")); m_search_label->setText(tr("Invalid search parameters (no object selected)"));
return; return;
} }
// Having PART_START_ROLE indicates that this is valid
const int object_idx = items[0]->parent()->indexOfChild(items[0]);
// TODO: Remove even string length limit // TODO: Remove even string length limit
if (search_str.length() % 2) if (search_str.length() % 2)
{ {
@ -449,13 +425,15 @@ void FIFOAnalyzer::BeginSearch()
m_search_results.clear(); m_search_results.clear();
const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt();
const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt();
const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt();
const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr);
const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr);
const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); const u32 object_start = frame_info.parts[start_part_nr].m_start;
const u32 object_size = frame_info.objectEnds[object_nr] - object_start; const u32 object_end = frame_info.parts[end_part_nr].m_end;
const u32 object_size = object_end - object_start;
const u8* const object = &fifo_frame.fifoData[object_start]; const u8* const object = &fifo_frame.fifoData[object_start];
@ -474,7 +452,7 @@ void FIFOAnalyzer::BeginSearch()
{ {
if (std::equal(search_val.begin(), search_val.end(), ptr)) if (std::equal(search_val.begin(), search_val.end(), ptr))
{ {
m_search_results.emplace_back(frame_nr, object_nr, cmd_nr); m_search_results.emplace_back(frame_nr, object_idx, cmd_nr);
break; break;
} }
} }
@ -528,7 +506,7 @@ void FIFOAnalyzer::ShowSearchResult(size_t index)
const auto& result = m_search_results[index]; const auto& result = m_search_results[index];
QTreeWidgetItem* object_item = QTreeWidgetItem* object_item =
m_tree_widget->topLevelItem(0)->child(result.m_frame)->child(result.m_object); m_tree_widget->topLevelItem(0)->child(result.m_frame)->child(result.m_object_idx);
m_tree_widget->setCurrentItem(object_item); m_tree_widget->setCurrentItem(object_item);
m_detail_list->setCurrentRow(result.m_cmd); m_detail_list->setCurrentRow(result.m_cmd);
@ -537,6 +515,225 @@ void FIFOAnalyzer::ShowSearchResult(size_t index)
m_search_previous->setEnabled(index > 0); m_search_previous->setEnabled(index > 0);
} }
namespace
{
// TODO: Not sure whether we should bother translating the descriptions
class DescriptionCallback : public OpcodeDecoder::Callback
{
public:
explicit DescriptionCallback(const CPState& cpmem) : m_cpmem(cpmem) {}
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
{
const auto [name, desc] = GetBPRegInfo(command, value);
ASSERT(!name.empty());
text = QObject::tr("BP register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += QObject::tr("No description available");
else
text += QString::fromStdString(desc);
}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
{
// Note: No need to update m_cpmem as it already has the final value for this object
const auto [name, desc] = GetCPRegInfo(command, value);
ASSERT(!name.empty());
text = QObject::tr("CP register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += QObject::tr("No description available");
else
text += QString::fromStdString(desc);
}
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
{
const auto [name, desc] = GetXFTransferInfo(address, count, data);
ASSERT(!name.empty());
text = QObject::tr("XF register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += QObject::tr("No description available");
else
text += QString::fromStdString(desc);
}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
{
const auto [desc, written] = GetXFIndexedLoadInfo(array, index, address, size);
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
switch (array)
{
case CPArray::XF_A:
text += QObject::tr("Usually used for position matrices");
break;
case CPArray::XF_B:
// i18n: A normal matrix is a matrix used for transforming normal vectors. The word "normal"
// does not have its usual meaning here, but rather the meaning of "perpendicular to a
// surface".
text += QObject::tr("Usually used for normal matrices");
break;
case CPArray::XF_C:
// i18n: Tex coord is short for texture coordinate
text += QObject::tr("Usually used for tex coord matrices");
break;
case CPArray::XF_D:
text += QObject::tr("Usually used for light objects");
break;
default:
break;
}
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
{
const auto name = fmt::format("{} VAT {}", primitive, vat);
// i18n: In this context, a primitive means a point, line, triangle or rectangle.
// Do not translate the word primitive as if it was an adjective.
text = QObject::tr("Primitive %1").arg(QString::fromStdString(name));
text += QLatin1Char{'\n'};
const auto& vtx_desc = m_cpmem.vtx_desc;
const auto& vtx_attr = m_cpmem.vtx_attr[vat];
u32 i = 0;
const auto process_component = [&](VertexComponentFormat cformat, ComponentFormat format,
u32 non_indexed_count, u32 indexed_count = 1) {
u32 count;
if (cformat == VertexComponentFormat::NotPresent)
return;
else if (cformat == VertexComponentFormat::Index8)
{
format = ComponentFormat::UByte;
count = indexed_count;
}
else if (cformat == VertexComponentFormat::Index16)
{
format = ComponentFormat::UShort;
count = indexed_count;
}
else
{
count = non_indexed_count;
}
const u32 component_size = GetElementSize(format);
for (u32 j = 0; j < count; j++)
{
for (u32 component_off = 0; component_off < component_size; component_off++)
{
text += QStringLiteral("%1").arg(vertex_data[i + component_off], 2, 16, QLatin1Char('0'));
}
if (format == ComponentFormat::Float)
{
const float value = Common::BitCast<float>(Common::swap32(&vertex_data[i]));
text += QStringLiteral(" (%1)").arg(value);
}
i += component_size;
text += QLatin1Char{' '};
}
text += QLatin1Char{' '};
};
const auto process_simple_component = [&](u32 size) {
for (u32 component_off = 0; component_off < size; component_off++)
{
text += QStringLiteral("%1").arg(vertex_data[i + component_off], 2, 16, QLatin1Char('0'));
}
i += size;
text += QLatin1Char{' '};
text += QLatin1Char{' '};
};
for (u32 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
ASSERT(i == vertex_num * vertex_size);
text += QLatin1Char{'\n'};
if (vtx_desc.low.PosMatIdx)
process_simple_component(1);
for (auto texmtxidx : vtx_desc.low.TexMatIdx)
{
if (texmtxidx)
process_simple_component(1);
}
process_component(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
vtx_attr.g0.PosElements == CoordComponentCount::XY ? 2 : 3);
// TODO: Is this calculation correct?
const u32 normal_component_count =
vtx_desc.low.Normal == VertexComponentFormat::Direct ? 3 : 1;
const u32 normal_elements = vtx_attr.g0.NormalElements == NormalComponentCount::NBT ? 3 : 1;
process_component(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
normal_component_count * normal_elements,
vtx_attr.g0.NormalIndex3 ? normal_elements : 1);
for (u32 c = 0; c < vtx_desc.low.Color.Size(); c++)
{
static constexpr Common::EnumMap<u32, ColorFormat::RGBA8888> component_sizes = {
2, // RGB565
3, // RGB888
4, // RGB888x
2, // RGBA4444
3, // RGBA6666
4, // RGBA8888
};
switch (vtx_desc.low.Color[c])
{
case VertexComponentFormat::Index8:
process_simple_component(1);
break;
case VertexComponentFormat::Index16:
process_simple_component(2);
break;
case VertexComponentFormat::Direct:
process_simple_component(component_sizes[vtx_attr.GetColorFormat(c)]);
break;
}
}
for (u32 t = 0; t < vtx_desc.high.TexCoord.Size(); t++)
{
process_component(vtx_desc.high.TexCoord[t], vtx_attr.GetTexFormat(t),
vtx_attr.GetTexElements(t) == TexComponentCount::ST ? 2 : 1);
}
}
}
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size))
{
text = QObject::tr("No description available");
}
OPCODE_CALLBACK(void OnNop(u32 count)) { text = QObject::tr("No description available"); }
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
{
text = QObject::tr("No description available");
}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {}
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
QString text;
CPState m_cpmem;
};
} // namespace
void FIFOAnalyzer::UpdateDescription() void FIFOAnalyzer::UpdateDescription()
{ {
m_entry_detail_browser->clear(); m_entry_detail_browser->clear();
@ -549,148 +746,24 @@ void FIFOAnalyzer::UpdateDescription()
if (items.isEmpty() || m_object_data_offsets.empty()) if (items.isEmpty() || m_object_data_offsets.empty())
return; return;
if (items[0]->data(0, FRAME_ROLE).isNull() || items[0]->data(0, OBJECT_ROLE).isNull()) if (items[0]->data(0, FRAME_ROLE).isNull() || items[0]->data(0, PART_START_ROLE).isNull())
return; return;
const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt();
const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt();
const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt();
const u32 entry_nr = m_detail_list->currentRow(); const u32 entry_nr = m_detail_list->currentRow();
const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr);
const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr);
const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); const u32 object_start = frame_info.parts[start_part_nr].m_start;
const u32 object_end = frame_info.parts[end_part_nr].m_end;
const u32 object_size = object_end - object_start;
const u32 entry_start = m_object_data_offsets[entry_nr]; const u32 entry_start = m_object_data_offsets[entry_nr];
const u8* cmddata = &fifo_frame.fifoData[object_start + entry_start]; auto callback = DescriptionCallback(frame_info.parts[end_part_nr].m_cpmem);
OpcodeDecoder::RunCommand(&fifo_frame.fifoData[object_start + entry_start],
// TODO: Not sure whether we should bother translating the descriptions object_size - entry_start, callback);
m_entry_detail_browser->setText(callback.text);
QString text;
if (*cmddata == OpcodeDecoder::GX_LOAD_BP_REG)
{
const u8 cmd = *(cmddata + 1);
const u32 value = Common::swap24(cmddata + 2);
const auto [name, desc] = GetBPRegInfo(cmd, value);
ASSERT(!name.empty());
text = tr("BP register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += tr("No description available");
else
text += QString::fromStdString(desc);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_CP_REG)
{
const u8 cmd = *(cmddata + 1);
const u32 value = Common::swap32(cmddata + 2);
const auto [name, desc] = GetCPRegInfo(cmd, value);
ASSERT(!name.empty());
text = tr("CP register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += tr("No description available");
else
text += QString::fromStdString(desc);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_XF_REG)
{
const auto [name, desc] = GetXFTransferInfo(cmddata + 1);
ASSERT(!name.empty());
text = tr("XF register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += tr("No description available");
else
text += QString::fromStdString(desc);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_A)
{
const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(cmddata + 1));
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
text += tr("Usually used for position matrices");
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_B)
{
const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(cmddata + 1));
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
// i18n: A normal matrix is a matrix used for transforming normal vectors. The word "normal"
// does not have its usual meaning here, but rather the meaning of "perpendicular to a surface".
text += tr("Usually used for normal matrices");
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_C)
{
const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(cmddata + 1));
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
// i18n: Tex coord is short for texture coordinate
text += tr("Usually used for tex coord matrices");
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_D)
{
const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(cmddata + 1));
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
text += tr("Usually used for light objects");
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
else if ((*cmddata & 0xC0) == 0x80)
{
const u8 vat = *cmddata & OpcodeDecoder::GX_VAT_MASK;
const QString name = QString::fromStdString(GetPrimitiveName(*cmddata));
const u16 vertex_count = Common::swap16(cmddata + 1);
// i18n: In this context, a primitive means a point, line, triangle or rectangle.
// Do not translate the word primitive as if it was an adjective.
text = tr("Primitive %1").arg(name);
text += QLatin1Char{'\n'};
const auto& vtx_desc = frame_info.objectCPStates[object_nr].vtxDesc;
const auto& vtx_attr = frame_info.objectCPStates[object_nr].vtxAttr[vat];
const auto component_sizes = VertexLoaderBase::GetVertexComponentSizes(vtx_desc, vtx_attr);
u32 i = 3;
for (u32 vertex_num = 0; vertex_num < vertex_count; vertex_num++)
{
text += QLatin1Char{'\n'};
for (u32 comp_size : component_sizes)
{
for (u32 comp_off = 0; comp_off < comp_size; comp_off++)
{
text += QStringLiteral("%1").arg(cmddata[i++], 2, 16, QLatin1Char('0'));
}
text += QLatin1Char{' '};
}
}
}
else
{
text = tr("No description available");
}
m_entry_detail_browser->setText(text);
} }

View File

@ -58,15 +58,19 @@ private:
struct SearchResult struct SearchResult
{ {
constexpr SearchResult(u32 frame, u32 object, u32 cmd) constexpr SearchResult(u32 frame, u32 object_idx, u32 cmd)
: m_frame(frame), m_object(object), m_cmd(cmd) : m_frame(frame), m_object_idx(object_idx), m_cmd(cmd)
{ {
} }
const u32 m_frame; const u32 m_frame;
const u32 m_object; // Index in tree view. Does not correspond with object numbers or part numbers.
const u32 m_object_idx;
const u32 m_cmd; const u32 m_cmd;
}; };
// Offsets from the start of the first part in an object for each command within the currently
// selected object.
std::vector<int> m_object_data_offsets; std::vector<int> m_object_data_offsets;
std::vector<SearchResult> m_search_results; std::vector<SearchResult> m_search_results;
}; };

View File

@ -21,7 +21,6 @@
#include "Core/Core.h" #include "Core/Core.h"
#include "Core/FifoPlayer/FifoDataFile.h" #include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include "Core/FifoPlayer/FifoPlayer.h" #include "Core/FifoPlayer/FifoPlayer.h"
#include "Core/FifoPlayer/FifoRecorder.h" #include "Core/FifoPlayer/FifoRecorder.h"
@ -151,18 +150,18 @@ void FIFOPlayerWindow::CreateWidgets()
layout->addWidget(recording_group); layout->addWidget(recording_group);
layout->addWidget(m_button_box); layout->addWidget(m_button_box);
QWidget* main_widget = new QWidget(this); m_main_widget = new QWidget(this);
main_widget->setLayout(layout); m_main_widget->setLayout(layout);
auto* tab_widget = new QTabWidget(this); m_tab_widget = new QTabWidget(this);
m_analyzer = new FIFOAnalyzer; m_analyzer = new FIFOAnalyzer;
tab_widget->addTab(main_widget, tr("Play / Record")); m_tab_widget->addTab(m_main_widget, tr("Play / Record"));
tab_widget->addTab(m_analyzer, tr("Analyze")); m_tab_widget->addTab(m_analyzer, tr("Analyze"));
auto* tab_layout = new QVBoxLayout; auto* tab_layout = new QVBoxLayout;
tab_layout->addWidget(tab_widget); tab_layout->addWidget(m_tab_widget);
setLayout(tab_layout); setLayout(tab_layout);
} }
@ -251,6 +250,8 @@ void FIFOPlayerWindow::OnEmulationStopped()
StopRecording(); StopRecording();
UpdateControls(); UpdateControls();
// When emulation stops, switch away from the analyzer tab, as it no longer shows anything useful
m_tab_widget->setCurrentWidget(m_main_widget);
m_analyzer->Update(); m_analyzer->Update();
} }

View File

@ -12,6 +12,7 @@ class QDialogButtonBox;
class QLabel; class QLabel;
class QPushButton; class QPushButton;
class QSpinBox; class QSpinBox;
class QTabWidget;
class FIFOAnalyzer; class FIFOAnalyzer;
class FIFOPlayerWindow : public QWidget class FIFOPlayerWindow : public QWidget
@ -64,6 +65,9 @@ private:
QCheckBox* m_early_memory_updates; QCheckBox* m_early_memory_updates;
QDialogButtonBox* m_button_box; QDialogButtonBox* m_button_box;
QWidget* m_main_widget;
QTabWidget* m_tab_widget;
FIFOAnalyzer* m_analyzer; FIFOAnalyzer* m_analyzer;
Core::State m_emu_state = Core::State::Uninitialized; Core::State m_emu_state = Core::State::Uninitialized;
}; };

View File

@ -12,7 +12,6 @@ set_target_properties(dolphin-tool PROPERTIES OUTPUT_NAME dolphin-tool)
target_link_libraries(dolphin-tool target_link_libraries(dolphin-tool
PRIVATE PRIVATE
core
discio discio
videocommon videocommon
cpp-optparse cpp-optparse

View File

@ -3,6 +3,8 @@
#include <array> #include <array>
#include "Common/EnumMap.h"
#include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DRender.h" #include "VideoBackends/D3D/D3DRender.h"
#include "VideoBackends/D3D/D3DState.h" #include "VideoBackends/D3D/D3DState.h"
@ -20,55 +22,75 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<D3DVertexFormat>(vtx_decl); return std::make_unique<D3DVertexFormat>(vtx_decl);
} }
static const DXGI_FORMAT d3d_format_lookup[5 * 4 * 2] = { DXGI_FORMAT VarToD3D(ComponentFormat t, int size, bool integer)
// float formats
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_SNORM,
DXGI_FORMAT_R16_UNORM,
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R8G8_SNORM,
DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,
// integer formats
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R8G8_UINT,
DXGI_FORMAT_R8G8_SINT,
DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
};
DXGI_FORMAT VarToD3D(VarType t, int size, bool integer)
{ {
DXGI_FORMAT retval = d3d_format_lookup[(int)t + 5 * (size - 1) + 5 * 4 * (int)integer]; using FormatMap = Common::EnumMap<DXGI_FORMAT, ComponentFormat::Float>;
static constexpr auto f = [](FormatMap a) { return a; }; // Deduction helper
static constexpr std::array<FormatMap, 4> d3d_float_format_lookup = {
f({
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_SNORM,
DXGI_FORMAT_R16_UNORM,
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
}),
f({
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R8G8_SNORM,
DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
}),
f({
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R32G32B32_FLOAT,
}),
f({
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,
}),
};
static constexpr std::array<FormatMap, 4> d3d_integer_format_lookup = {
f({
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_R8G8_UINT,
DXGI_FORMAT_R8G8_SINT,
DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
};
DXGI_FORMAT retval =
integer ? d3d_integer_format_lookup[size - 1][t] : d3d_float_format_lookup[size - 1][t];
if (retval == DXGI_FORMAT_UNKNOWN) if (retval == DXGI_FORMAT_UNKNOWN)
{ {
PanicAlertFmt("VarToD3D: Invalid type/size combo {}, {}, {}", t, size, integer); PanicAlertFmt("VarToD3D: Invalid type/size combo {}, {}, {}", t, size, integer);

View File

@ -4,39 +4,43 @@
#include "VideoBackends/D3D12/DX12VertexFormat.h" #include "VideoBackends/D3D12/DX12VertexFormat.h"
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/EnumMap.h"
#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderGen.h" #include "VideoCommon/VertexShaderGen.h"
namespace DX12 namespace DX12
{ {
static DXGI_FORMAT VarToDXGIFormat(VarType t, u32 components, bool integer) static DXGI_FORMAT VarToDXGIFormat(ComponentFormat t, u32 components, bool integer)
{ {
using ComponentArray = std::array<DXGI_FORMAT, 4>;
static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper
// NOTE: 3-component formats are not valid. // NOTE: 3-component formats are not valid.
static const DXGI_FORMAT float_type_lookup[][4] = { static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> float_type_lookup = {
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, f({DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE DXGI_FORMAT_R8G8B8A8_UNORM}), // UByte
{DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM, f({DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE DXGI_FORMAT_R8G8B8A8_SNORM}), // Byte
{DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, f({DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT DXGI_FORMAT_R16G16B16A16_UNORM}), // UShort
{DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM, f({DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT DXGI_FORMAT_R16G16B16A16_SNORM}), // Short
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float
}; };
static const DXGI_FORMAT integer_type_lookup[][4] = { static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> integer_type_lookup = {
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT, f({DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE DXGI_FORMAT_R8G8B8A8_UINT}), // UByte
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT, f({DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE DXGI_FORMAT_R8G8B8A8_SINT}), // Byte
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT, f({DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT DXGI_FORMAT_R16G16B16A16_UINT}), // UShort
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT, f({DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT DXGI_FORMAT_R16G16B16A16_SINT}), // Short
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float
}; };
ASSERT(components > 0 && components <= 4); ASSERT(components > 0 && components <= 4);

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/GL/GLUtil.h" #include "Common/GL/GLUtil.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
@ -23,10 +24,11 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<GLVertexFormat>(vtx_decl); return std::make_unique<GLVertexFormat>(vtx_decl);
} }
static inline GLuint VarToGL(VarType t) static inline GLuint VarToGL(ComponentFormat t)
{ {
static const GLuint lookup[5] = {GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, static constexpr Common::EnumMap<GLuint, ComponentFormat::Float> lookup = {
GL_FLOAT}; GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, GL_FLOAT,
};
return lookup[t]; return lookup[t];
} }

View File

@ -36,20 +36,21 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
{ {
DebugUtil::OnObjectBegin(); DebugUtil::OnObjectBegin();
u8 primitiveType = 0; using OpcodeDecoder::Primitive;
Primitive primitive_type = Primitive::GX_DRAW_QUADS;
switch (m_current_primitive_type) switch (m_current_primitive_type)
{ {
case PrimitiveType::Points: case PrimitiveType::Points:
primitiveType = OpcodeDecoder::GX_DRAW_POINTS; primitive_type = Primitive::GX_DRAW_POINTS;
break; break;
case PrimitiveType::Lines: case PrimitiveType::Lines:
primitiveType = OpcodeDecoder::GX_DRAW_LINES; primitive_type = Primitive::GX_DRAW_LINES;
break; break;
case PrimitiveType::Triangles: case PrimitiveType::Triangles:
primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLES; primitive_type = Primitive::GX_DRAW_TRIANGLES;
break; break;
case PrimitiveType::TriangleStrip: case PrimitiveType::TriangleStrip:
primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP; primitive_type = Primitive::GX_DRAW_TRIANGLE_STRIP;
break; break;
} }
@ -57,7 +58,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
if (g_renderer->IsBBoxEnabled()) if (g_renderer->IsBBoxEnabled())
g_renderer->BBoxFlush(); g_renderer->BBoxFlush();
m_setup_unit.Init(primitiveType); m_setup_unit.Init(primitive_type);
// set all states with are stored within video sw // set all states with are stored within video sw
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
@ -74,7 +75,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
memset(static_cast<void*>(&m_vertex), 0, sizeof(m_vertex)); memset(static_cast<void*>(&m_vertex), 0, sizeof(m_vertex));
// parse the videocommon format to our own struct format (m_vertex) // parse the videocommon format to our own struct format (m_vertex)
SetFormat(g_main_cp_state.last_id, primitiveType); SetFormat();
ParseVertex(VertexLoaderManager::GetCurrentVertexFormat()->GetVertexDeclaration(), index); ParseVertex(VertexLoaderManager::GetCurrentVertexFormat()->GetVertexDeclaration(), index);
// transform this vertex so that it can be used for rasterization (outVertex) // transform this vertex so that it can be used for rasterization (outVertex)
@ -98,7 +99,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
DebugUtil::OnObjectEnd(); DebugUtil::OnObjectEnd();
} }
void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType) void SWVertexLoader::SetFormat()
{ {
// matrix index from xf regs or cp memory? // matrix index from xf regs or cp memory?
if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx || if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx ||
@ -144,7 +145,7 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f
if (format.enable) if (format.enable)
{ {
src.Skip(format.offset); src.Skip(format.offset);
src.Skip(base_component * (1 << (format.type >> 1))); src.Skip(base_component * GetElementSize(format.type));
int i; int i;
for (i = 0; i < std::min(format.components - base_component, components); i++) for (i = 0; i < std::min(format.components - base_component, components); i++)
@ -152,24 +153,24 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f
int i_dst = reverse ? components - i - 1 : i; int i_dst = reverse ? components - i - 1 : i;
switch (format.type) switch (format.type)
{ {
case VAR_UNSIGNED_BYTE: case ComponentFormat::UByte:
dst[i_dst] = ReadNormalized<T, u8>(src.Read<u8, swap>()); dst[i_dst] = ReadNormalized<T, u8>(src.Read<u8, swap>());
break; break;
case VAR_BYTE: case ComponentFormat::Byte:
dst[i_dst] = ReadNormalized<T, s8>(src.Read<s8, swap>()); dst[i_dst] = ReadNormalized<T, s8>(src.Read<s8, swap>());
break; break;
case VAR_UNSIGNED_SHORT: case ComponentFormat::UShort:
dst[i_dst] = ReadNormalized<T, u16>(src.Read<u16, swap>()); dst[i_dst] = ReadNormalized<T, u16>(src.Read<u16, swap>());
break; break;
case VAR_SHORT: case ComponentFormat::Short:
dst[i_dst] = ReadNormalized<T, s16>(src.Read<s16, swap>()); dst[i_dst] = ReadNormalized<T, s16>(src.Read<s16, swap>());
break; break;
case VAR_FLOAT: case ComponentFormat::Float:
dst[i_dst] = ReadNormalized<T, float>(src.Read<float, swap>()); dst[i_dst] = ReadNormalized<T, float>(src.Read<float, swap>());
break; break;
} }
ASSERT_MSG(VIDEO, !format.integer || format.type != VAR_FLOAT, ASSERT_MSG(VIDEO, !format.integer || format.type != ComponentFormat::Float,
"only non-float values are allowed to be streamed as integer"); "only non-float values are allowed to be streamed as integer");
} }
for (; i < components; i++) for (; i < components; i++)

View File

@ -22,7 +22,7 @@ public:
protected: protected:
void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override;
void SetFormat(u8 attributeIndex, u8 primitiveType); void SetFormat();
void ParseVertex(const PortableVertexDeclaration& vdec, int index); void ParseVertex(const PortableVertexDeclaration& vdec, int index);
InputVertexData m_vertex{}; InputVertexData m_vertex{};

View File

@ -9,9 +9,9 @@
#include "VideoBackends/Software/Clipper.h" #include "VideoBackends/Software/Clipper.h"
#include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/OpcodeDecoding.h"
void SetupUnit::Init(u8 primitiveType) void SetupUnit::Init(OpcodeDecoder::Primitive primitive_type)
{ {
m_PrimType = primitiveType; m_PrimType = primitive_type;
m_VertexCounter = 0; m_VertexCounter = 0;
m_VertPointer[0] = &m_Vertices[0]; m_VertPointer[0] = &m_Vertices[0];
@ -28,31 +28,32 @@ OutputVertexData* SetupUnit::GetVertex()
void SetupUnit::SetupVertex() void SetupUnit::SetupVertex()
{ {
using OpcodeDecoder::Primitive;
switch (m_PrimType) switch (m_PrimType)
{ {
case OpcodeDecoder::GX_DRAW_QUADS: case Primitive::GX_DRAW_QUADS:
SetupQuad(); SetupQuad();
break; break;
case OpcodeDecoder::GX_DRAW_QUADS_2: case Primitive::GX_DRAW_QUADS_2:
WARN_LOG_FMT(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2"); WARN_LOG_FMT(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2");
SetupQuad(); SetupQuad();
break; break;
case OpcodeDecoder::GX_DRAW_TRIANGLES: case Primitive::GX_DRAW_TRIANGLES:
SetupTriangle(); SetupTriangle();
break; break;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: case Primitive::GX_DRAW_TRIANGLE_STRIP:
SetupTriStrip(); SetupTriStrip();
break; break;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: case Primitive::GX_DRAW_TRIANGLE_FAN:
SetupTriFan(); SetupTriFan();
break; break;
case OpcodeDecoder::GX_DRAW_LINES: case Primitive::GX_DRAW_LINES:
SetupLine(); SetupLine();
break; break;
case OpcodeDecoder::GX_DRAW_LINE_STRIP: case Primitive::GX_DRAW_LINE_STRIP:
SetupLineStrip(); SetupLineStrip();
break; break;
case OpcodeDecoder::GX_DRAW_POINTS: case Primitive::GX_DRAW_POINTS:
SetupPoint(); SetupPoint();
break; break;
} }

View File

@ -6,9 +6,14 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoBackends/Software/NativeVertexFormat.h" #include "VideoBackends/Software/NativeVertexFormat.h"
namespace OpcodeDecoder
{
enum class Primitive : u8;
}
class SetupUnit class SetupUnit
{ {
u8 m_PrimType = 0; OpcodeDecoder::Primitive m_PrimType{};
int m_VertexCounter = 0; int m_VertexCounter = 0;
OutputVertexData m_Vertices[3]; OutputVertexData m_Vertices[3];
@ -24,7 +29,7 @@ class SetupUnit
void SetupPoint(); void SetupPoint();
public: public:
void Init(u8 primitiveType); void Init(OpcodeDecoder::Primitive primitive_type);
OutputVertexData* GetVertex(); OutputVertexData* GetVertex();

View File

@ -4,6 +4,7 @@
#include "VideoBackends/Vulkan/VKVertexFormat.h" #include "VideoBackends/Vulkan/VKVertexFormat.h"
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/EnumMap.h"
#include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/ObjectCache.h" #include "VideoBackends/Vulkan/ObjectCache.h"
@ -13,32 +14,35 @@
namespace Vulkan namespace Vulkan
{ {
static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer) static VkFormat VarToVkFormat(ComponentFormat t, uint32_t components, bool integer)
{ {
static const VkFormat float_type_lookup[][4] = { using ComponentArray = std::array<VkFormat, 4>;
{VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM, static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper
VK_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE
{VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM, static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> float_type_lookup = {
VK_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE f({VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM,
{VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM, VK_FORMAT_R8G8B8A8_UNORM}), // UByte
VK_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT f({VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM,
{VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM, VK_FORMAT_R8G8B8A8_SNORM}), // Byte
VK_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT f({VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM,
{VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R16G16B16A16_UNORM}), // UShort
VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT f({VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16A16_SNORM}), // Short
f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT}), // Float
}; };
static const VkFormat integer_type_lookup[][4] = { static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> integer_type_lookup = {
{VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT, f({VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE VK_FORMAT_R8G8B8A8_UINT}), // UByte
{VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT, f({VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE VK_FORMAT_R8G8B8A8_SINT}), // Byte
{VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT, f({VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT VK_FORMAT_R16G16B16A16_UINT}), // UShort
{VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT, f({VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT VK_FORMAT_R16G16B16A16_SINT}), // Short
{VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT VK_FORMAT_R32G32B32A32_SFLOAT}), // Float
}; };
ASSERT(components > 0 && components <= 4); ASSERT(components > 0 && components <= 4);

View File

@ -258,7 +258,7 @@ enum class TevBias : u32
{ {
Zero = 0, Zero = 0,
AddHalf = 1, AddHalf = 1,
Subhalf = 2, SubHalf = 2,
Compare = 3 Compare = 3
}; };
template <> template <>
@ -491,6 +491,94 @@ struct fmt::formatter<TevStageCombiner::ColorCombiner>
template <typename FormatContext> template <typename FormatContext>
auto format(const TevStageCombiner::ColorCombiner& cc, FormatContext& ctx) auto format(const TevStageCombiner::ColorCombiner& cc, FormatContext& ctx)
{ {
auto out = ctx.out();
if (cc.bias != TevBias::Compare)
{
// Generate an equation view, simplifying out addition of zero and multiplication by 1
// dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale
// or equivalently and more readably when the terms are not constants:
// dest = (d (OP) lerp(a, b, c) + bias) * scale
// Note that lerping is more complex than the first form shows; see PixelShaderGen's
// WriteTevRegular for more details.
static constexpr Common::EnumMap<const char*, TevColorArg::Zero> alt_names = {
"prev.rgb", "prev.aaa", "c0.rgb", "c0.aaa", "c1.rgb", "c1.aaa", "c2.rgb", "c2.aaa",
"tex.rgb", "tex.aaa", "ras.rgb", "ras.aaa", "1", ".5", "konst.rgb", "0",
};
const bool has_d = cc.d != TevColorArg::Zero;
// If c is one, (1 - c) is zero, so (1-c)*a is zero
const bool has_ac = cc.a != TevColorArg::Zero && cc.c != TevColorArg::One;
// If either b or c is zero, b*c is zero
const bool has_bc = cc.b != TevColorArg::Zero && cc.c != TevColorArg::Zero;
const bool has_bias = cc.bias != TevBias::Zero; // != Compare is already known
const bool has_scale = cc.scale != TevScale::Scale1;
const char op = (cc.op == TevOp::Sub ? '-' : '+');
if (cc.dest == TevOutput::Prev)
out = format_to(out, "dest.rgb = ");
else
out = format_to(out, "{:n}.rgb = ", cc.dest);
if (has_scale)
out = format_to(out, "(");
if (has_d)
out = format_to(out, "{}", alt_names[cc.d]);
if (has_ac || has_bc)
{
if (has_d)
out = format_to(out, " {} ", op);
else if (cc.op == TevOp::Sub)
out = format_to(out, "{}", op);
if (has_ac && has_bc)
{
if (cc.c == TevColorArg::Half)
{
// has_a and has_b imply that c is not Zero or One, and Half is the only remaining
// numeric constant. This results in an average.
out = format_to(out, "({} + {})/2", alt_names[cc.a], alt_names[cc.b]);
}
else
{
out = format_to(out, "lerp({}, {}, {})", alt_names[cc.a], alt_names[cc.b],
alt_names[cc.c]);
}
}
else if (has_ac)
{
if (cc.c == TevColorArg::Zero)
out = format_to(out, "{}", alt_names[cc.a]);
else if (cc.c == TevColorArg::Half) // 1 - .5 is .5
out = format_to(out, ".5*{}", alt_names[cc.a]);
else
out = format_to(out, "(1 - {})*{}", alt_names[cc.c], alt_names[cc.a]);
}
else // has_bc
{
if (cc.c == TevColorArg::One)
out = format_to(out, "{}", alt_names[cc.b]);
else
out = format_to(out, "{}*{}", alt_names[cc.c], alt_names[cc.b]);
}
}
if (has_bias)
{
if (has_ac || has_bc || has_d)
out = format_to(out, cc.bias == TevBias::AddHalf ? " + .5" : " - .5");
else
out = format_to(out, cc.bias == TevBias::AddHalf ? ".5" : "-.5");
}
else
{
// If nothing has been written so far, add a zero
if (!(has_ac || has_bc || has_d))
out = format_to(out, "0");
}
if (has_scale)
out = format_to(out, ") * {:n}", cc.scale);
out = format_to(out, "\n\n");
}
return format_to(ctx.out(), return format_to(ctx.out(),
"a: {}\n" "a: {}\n"
"b: {}\n" "b: {}\n"
@ -512,7 +600,80 @@ struct fmt::formatter<TevStageCombiner::AlphaCombiner>
template <typename FormatContext> template <typename FormatContext>
auto format(const TevStageCombiner::AlphaCombiner& ac, FormatContext& ctx) auto format(const TevStageCombiner::AlphaCombiner& ac, FormatContext& ctx)
{ {
return format_to(ctx.out(), auto out = ctx.out();
if (ac.bias != TevBias::Compare)
{
// Generate an equation view, simplifying out addition of zero and multiplication by 1
// dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale
// or equivalently and more readably when the terms are not constants:
// dest = (d (OP) lerp(a, b, c) + bias) * scale
// Note that lerping is more complex than the first form shows; see PixelShaderGen's
// WriteTevRegular for more details.
// We don't need an alt_names map here, unlike the color combiner, as the only special term is
// Zero, and we we filter that out below. However, we do need to append ".a" to all
// parameters, to make it explicit that these are operations on the alpha term instead of the
// 4-element vector. We also need to use the :n specifier so that the numeric ID isn't shown.
const bool has_d = ac.d != TevAlphaArg::Zero;
// There is no c value for alpha that results in (1 - c) always being zero
const bool has_ac = ac.a != TevAlphaArg::Zero;
// If either b or c is zero, b*c is zero
const bool has_bc = ac.b != TevAlphaArg::Zero && ac.c != TevAlphaArg::Zero;
const bool has_bias = ac.bias != TevBias::Zero; // != Compare is already known
const bool has_scale = ac.scale != TevScale::Scale1;
const char op = (ac.op == TevOp::Sub ? '-' : '+');
if (ac.dest == TevOutput::Prev)
out = format_to(out, "dest.a = ");
else
out = format_to(out, "{:n}.a = ", ac.dest);
if (has_scale)
out = format_to(out, "(");
if (has_d)
out = format_to(out, "{:n}.a", ac.d);
if (has_ac || has_bc)
{
if (has_d)
out = format_to(out, " {} ", op);
else if (ac.op == TevOp::Sub)
out = format_to(out, "{}", op);
if (has_ac && has_bc)
{
out = format_to(out, "lerp({:n}.a, {:n}.a, {:n}.a)", ac.a, ac.b, ac.c);
}
else if (has_ac)
{
if (ac.c == TevAlphaArg::Zero)
out = format_to(out, "{:n}.a", ac.a);
else
out = format_to(out, "(1 - {:n}.a)*{:n}.a", ac.c, ac.a);
}
else // has_bc
{
out = format_to(out, "{:n}.a*{:n}.a", ac.c, ac.b);
}
}
if (has_bias)
{
if (has_ac || has_bc || has_d)
out = format_to(out, ac.bias == TevBias::AddHalf ? " + .5" : " - .5");
else
out = format_to(out, ac.bias == TevBias::AddHalf ? ".5" : "-.5");
}
else
{
// If nothing has been written so far, add a zero
if (!(has_ac || has_bc || has_d))
out = format_to(out, "0");
}
if (has_scale)
out = format_to(out, ") * {:n}", ac.scale);
out = format_to(out, "\n\n");
}
return format_to(out,
"a: {}\n" "a: {}\n"
"b: {}\n" "b: {}\n"
"c: {}\n" "c: {}\n"
@ -756,14 +917,14 @@ struct fmt::formatter<LODType> : EnumFormatter<LODType::Diagonal>
formatter() : EnumFormatter({"Edge LOD", "Diagonal LOD"}) {} formatter() : EnumFormatter({"Edge LOD", "Diagonal LOD"}) {}
}; };
enum class MaxAnsio enum class MaxAniso
{ {
One = 0, One = 0,
Two = 1, Two = 1,
Four = 2, Four = 2,
}; };
template <> template <>
struct fmt::formatter<MaxAnsio> : EnumFormatter<MaxAnsio::Four> struct fmt::formatter<MaxAniso> : EnumFormatter<MaxAniso::Four>
{ {
formatter() : EnumFormatter({"1", "2", "4"}) {} formatter() : EnumFormatter({"1", "2", "4"}) {}
}; };
@ -777,7 +938,7 @@ union TexMode0
BitField<7, 1, FilterMode> min_filter; BitField<7, 1, FilterMode> min_filter;
BitField<8, 1, LODType> diag_lod; BitField<8, 1, LODType> diag_lod;
BitField<9, 8, s32> lod_bias; BitField<9, 8, s32> lod_bias;
BitField<19, 2, MaxAnsio> max_aniso; BitField<19, 2, MaxAniso> max_aniso;
BitField<21, 1, bool, u32> lod_clamp; BitField<21, 1, bool, u32> lod_clamp;
u32 hex; u32 hex;
}; };
@ -2205,7 +2366,7 @@ struct BPMemory
extern BPMemory bpmem; extern BPMemory bpmem;
void LoadBPReg(u32 value0, int cycles_into_future); void LoadBPReg(u8 reg, u32 value, int cycles_into_future);
void LoadBPRegPreprocess(u32 value0, int cycles_into_future); void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future);
std::pair<std::string, std::string> GetBPRegInfo(u8 cmd, u32 cmddata); std::pair<std::string, std::string> GetBPRegInfo(u8 cmd, u32 cmddata);

View File

@ -716,29 +716,27 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
bp.newvalue); bp.newvalue);
} }
// Call browser: OpcodeDecoding.cpp ExecuteDisplayList > Decode() > LoadBPReg() // Call browser: OpcodeDecoding.cpp RunCallback::OnBP()
void LoadBPReg(u32 value0, int cycles_into_future) void LoadBPReg(u8 reg, u32 value, int cycles_into_future)
{ {
int regNum = value0 >> 24; int oldval = ((u32*)&bpmem)[reg];
int oldval = ((u32*)&bpmem)[regNum]; int newval = (oldval & ~bpmem.bpMask) | (value & bpmem.bpMask);
int newval = (oldval & ~bpmem.bpMask) | (value0 & bpmem.bpMask);
int changes = (oldval ^ newval) & 0xFFFFFF; int changes = (oldval ^ newval) & 0xFFFFFF;
BPCmd bp = {regNum, changes, newval}; BPCmd bp = {reg, changes, newval};
// Reset the mask register if we're not trying to set it ourselves. // Reset the mask register if we're not trying to set it ourselves.
if (regNum != BPMEM_BP_MASK) if (reg != BPMEM_BP_MASK)
bpmem.bpMask = 0xFFFFFF; bpmem.bpMask = 0xFFFFFF;
BPWritten(bp, cycles_into_future); BPWritten(bp, cycles_into_future);
} }
void LoadBPRegPreprocess(u32 value0, int cycles_into_future) void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future)
{ {
int regNum = value0 >> 24; // masking via BPMEM_BP_MASK could hypothetically be a problem
// masking could hypothetically be a problem u32 newval = value & 0xffffff;
u32 newval = value0 & 0xffffff; switch (reg)
switch (regNum)
{ {
case BPMEM_SETDRAWDONE: case BPMEM_SETDRAWDONE:
if ((newval & 0xff) == 0x02) if ((newval & 0xff) == 0x02)

View File

@ -2,7 +2,14 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
#include <cstring>
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
#include "Common/Logging/Log.h"
#include "Core/DolphinAnalytics.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/VertexLoaderManager.h"
// CP state // CP state
CPState g_main_cp_state; CPState g_main_cp_state;
@ -22,13 +29,13 @@ void DoCPState(PointerWrap& p)
if (p.mode == PointerWrap::MODE_READ) if (p.mode == PointerWrap::MODE_READ)
{ {
CopyPreprocessCPStateFromMain(); CopyPreprocessCPStateFromMain();
g_main_cp_state.bases_dirty = true; VertexLoaderManager::g_bases_dirty = true;
} }
} }
void CopyPreprocessCPStateFromMain() void CopyPreprocessCPStateFromMain()
{ {
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState)); std::memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
} }
std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value) std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value)
@ -62,12 +69,167 @@ std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value)
return std::make_pair(fmt::format("CP_VAT_REG_C - Format {}", cmd & CP_VAT_MASK), return std::make_pair(fmt::format("CP_VAT_REG_C - Format {}", cmd & CP_VAT_MASK),
fmt::to_string(UVAT_group2{.Hex = value})); fmt::to_string(UVAT_group2{.Hex = value}));
case ARRAY_BASE: case ARRAY_BASE:
return std::make_pair(fmt::format("ARRAY_BASE Array {}", cmd & CP_ARRAY_MASK), return std::make_pair(
fmt::format("Base address {:08x}", value)); fmt::format("ARRAY_BASE Array {}", static_cast<CPArray>(cmd & CP_ARRAY_MASK)),
fmt::format("Base address {:08x}", value));
case ARRAY_STRIDE: case ARRAY_STRIDE:
return std::make_pair(fmt::format("ARRAY_STRIDE Array {}", cmd - ARRAY_STRIDE), return std::make_pair(
fmt::format("Stride {:02x}", value & 0xff)); fmt::format("ARRAY_STRIDE Array {}", static_cast<CPArray>(cmd & CP_ARRAY_MASK)),
fmt::format("Stride {:02x}", value & 0xff));
default: default:
return std::make_pair(fmt::format("Invalid CP register {:02x} = {:08x}", cmd, value), ""); return std::make_pair(fmt::format("Invalid CP register {:02x} = {:08x}", cmd, value), "");
} }
} }
CPState::CPState(const u32* memory) : CPState()
{
matrix_index_a.Hex = memory[MATINDEX_A];
matrix_index_b.Hex = memory[MATINDEX_B];
vtx_desc.low.Hex = memory[VCD_LO];
vtx_desc.high.Hex = memory[VCD_HI];
for (u32 i = 0; i < CP_NUM_VAT_REG; i++)
{
vtx_attr[i].g0.Hex = memory[CP_VAT_REG_A + i];
vtx_attr[i].g1.Hex = memory[CP_VAT_REG_B + i];
vtx_attr[i].g2.Hex = memory[CP_VAT_REG_C + i];
}
for (u32 i = 0; i < CP_NUM_ARRAYS; i++)
{
array_bases[static_cast<CPArray>(i)] = memory[ARRAY_BASE + i];
array_strides[static_cast<CPArray>(i)] = memory[ARRAY_STRIDE + i];
}
}
void CPState::LoadCPReg(u8 sub_cmd, u32 value)
{
switch (sub_cmd & CP_COMMAND_MASK)
{
case UNKNOWN_00:
case UNKNOWN_10:
case UNKNOWN_20:
if (!(sub_cmd == UNKNOWN_20 && value == 0))
{
// All titles using libogc or the official SDK issue 0x20 with value=0 on startup
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}",
sub_cmd);
}
break;
case MATINDEX_A:
if (sub_cmd != MATINDEX_A)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_A: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_A, sub_cmd);
}
matrix_index_a.Hex = value;
break;
case MATINDEX_B:
if (sub_cmd != MATINDEX_B)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_B: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_B, sub_cmd);
}
matrix_index_b.Hex = value;
break;
case VCD_LO:
if (sub_cmd != VCD_LO) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_LO: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_LO, sub_cmd);
}
vtx_desc.low.Hex = value;
break;
case VCD_HI:
if (sub_cmd != VCD_HI) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_HI: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_HI, sub_cmd);
}
vtx_desc.high.Hex = value;
break;
case CP_VAT_REG_A:
if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value;
break;
case CP_VAT_REG_B:
if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value;
break;
case CP_VAT_REG_C:
if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value;
break;
// Pointers to vertex arrays in GC RAM
case ARRAY_BASE:
array_bases[static_cast<CPArray>(sub_cmd & CP_ARRAY_MASK)] =
value & CommandProcessor::GetPhysicalAddressMask();
break;
case ARRAY_STRIDE:
array_strides[static_cast<CPArray>(sub_cmd & CP_ARRAY_MASK)] = value & 0xFF;
break;
default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value);
}
}
void CPState::FillCPMemoryArray(u32* memory) const
{
memory[MATINDEX_A] = matrix_index_a.Hex;
memory[MATINDEX_B] = matrix_index_b.Hex;
memory[VCD_LO] = vtx_desc.low.Hex;
memory[VCD_HI] = vtx_desc.high.Hex;
for (int i = 0; i < CP_NUM_VAT_REG; ++i)
{
memory[CP_VAT_REG_A + i] = vtx_attr[i].g0.Hex;
memory[CP_VAT_REG_B + i] = vtx_attr[i].g1.Hex;
memory[CP_VAT_REG_C + i] = vtx_attr[i].g2.Hex;
}
for (int i = 0; i < CP_NUM_ARRAYS; ++i)
{
memory[ARRAY_BASE + i] = array_bases[static_cast<CPArray>(i)];
memory[ARRAY_STRIDE + i] = array_strides[static_cast<CPArray>(i)];
}
}

View File

@ -5,12 +5,14 @@
#include <array> #include <array>
#include <string> #include <string>
#include <type_traits>
#include <utility> #include <utility>
#include "Common/BitField.h" #include "Common/BitField.h"
#include "Common/BitSet.h" #include "Common/BitSet.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h" #include "Common/EnumFormatter.h"
#include "Common/EnumMap.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
enum enum
@ -53,24 +55,46 @@ enum
}; };
// Vertex array numbers // Vertex array numbers
enum enum class CPArray : u8
{ {
ARRAY_POSITION = 0, Position = 0,
ARRAY_NORMAL = 1, Normal = 1,
ARRAY_COLOR0 = 2,
NUM_COLOR_ARRAYS = 2,
ARRAY_TEXCOORD0 = 4,
NUM_TEXCOORD_ARRAYS = 8,
ARRAY_XF_A = 12, // Usually used for position matrices Color0 = 2,
ARRAY_XF_B = 13, // Usually used for normal matrices Color1 = 3,
ARRAY_XF_C = 14, // Usually used for tex coord matrices
ARRAY_XF_D = 15, // Usually used for light objects
// Number of arrays related to vertex components (position, normal, color, tex coord) TexCoord0 = 4,
// Excludes the 4 arrays used for indexed XF loads TexCoord1 = 5,
NUM_VERTEX_COMPONENT_ARRAYS = 12, TexCoord2 = 6,
TexCoord3 = 7,
TexCoord4 = 8,
TexCoord5 = 9,
TexCoord6 = 10,
TexCoord7 = 11,
XF_A = 12, // Usually used for position matrices
XF_B = 13, // Usually used for normal matrices
XF_C = 14, // Usually used for tex coord matrices
XF_D = 15, // Usually used for light objects
}; };
template <>
struct fmt::formatter<CPArray> : EnumFormatter<CPArray::XF_D>
{
static constexpr array_type names = {"Position", "Normal", "Color 0", "Color 1",
"Tex Coord 0", "Tex Coord 1", "Tex Coord 2", "Tex Coord 3",
"Tex Coord 4", "Tex Coord 5", "Tex Coord 6", "Tex Coord 7",
"XF A", "XF B", "XF C", "XF D"};
formatter() : EnumFormatter(names) {}
};
// Intended for offsetting from Color0/TexCoord0
constexpr CPArray operator+(CPArray array, u8 offset)
{
return static_cast<CPArray>(static_cast<u8>(array) + offset);
}
// Number of arrays related to vertex components (position, normal, color, tex coord)
// Excludes the 4 arrays used for indexed XF loads
constexpr u8 NUM_VERTEX_COMPONENT_ARRAYS = 12;
// Vertex components // Vertex components
enum class VertexComponentFormat enum class VertexComponentFormat
@ -607,32 +631,29 @@ class VertexLoaderBase;
// STATE_TO_SAVE // STATE_TO_SAVE
struct CPState final struct CPState final
{ {
u32 array_bases[CP_NUM_ARRAYS]{}; CPState() = default;
u32 array_strides[CP_NUM_ARRAYS]{}; explicit CPState(const u32* memory);
// Mutates the CP state based on the given command and value.
void LoadCPReg(u8 sub_cmd, u32 value);
// Fills memory with data from CP regs. There should be space for 0x100 values in memory.
void FillCPMemoryArray(u32* memory) const;
Common::EnumMap<u32, CPArray::XF_D> array_bases;
Common::EnumMap<u32, CPArray::XF_D> array_strides;
TMatrixIndexA matrix_index_a{}; TMatrixIndexA matrix_index_a{};
TMatrixIndexB matrix_index_b{}; TMatrixIndexB matrix_index_b{};
TVtxDesc vtx_desc; TVtxDesc vtx_desc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed. // Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT vtx_attr[CP_NUM_VAT_REG]{}; std::array<VAT, CP_NUM_VAT_REG> vtx_attr{};
// Attributes that actually belong to VertexLoaderManager:
BitSet32 attr_dirty{};
bool bases_dirty = false;
VertexLoaderBase* vertex_loaders[CP_NUM_VAT_REG]{};
int last_id = 0;
}; };
static_assert(std::is_trivially_copyable_v<CPState>);
class PointerWrap; class PointerWrap;
extern CPState g_main_cp_state; extern CPState g_main_cp_state;
extern CPState g_preprocess_cp_state; extern CPState g_preprocess_cp_state;
// Might move this into its own file later.
void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);
// Fills memory with data from CP regs
void FillCPMemoryArray(u32* memory);
void DoCPState(PointerWrap& p); void DoCPState(PointerWrap& p);
void CopyPreprocessCPStateFromMain(); void CopyPreprocessCPStateFromMain();

View File

@ -5,6 +5,7 @@
#include <atomic> #include <atomic>
#include <cstring> #include <cstring>
#include <fmt/format.h>
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
@ -607,10 +608,10 @@ void SetCpClearRegister()
{ {
} }
void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess) void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess)
{ {
// TODO(Omega): Maybe dump FIFO to file on this error // TODO(Omega): Maybe dump FIFO to file on this error
PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, {2}).\n" PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, preprocess={2}).\n"
"This means one of the following:\n" "This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n" "* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n" "* Command stream corrupted by some spurious memory bug\n"
@ -618,7 +619,7 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
"* Some other sort of bug\n\n" "* Some other sort of bug\n\n"
"Further errors will be sent to the Video Backend log and\n" "Further errors will be sent to the Video Backend log and\n"
"Dolphin will now likely crash or hang. Enjoy.", "Dolphin will now likely crash or hang. Enjoy.",
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false"); cmd_byte, fmt::ptr(buffer), preprocess);
{ {
PanicAlertFmt("Illegal command {:02x}\n" PanicAlertFmt("Illegal command {:02x}\n"

View File

@ -169,7 +169,7 @@ void SetCpClearRegister();
void SetCpControlRegister(); void SetCpControlRegister();
void SetCpStatusRegister(); void SetCpStatusRegister();
void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess); void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess);
u32 GetPhysicalAddressMask(); u32 GetPhysicalAddressMask();

View File

@ -273,8 +273,8 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
} }
} }
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len); Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_pp_read_ptr = OpcodeDecoder::Run<true>( s_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false); DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr);
// This would have to be locked if the GPU thread didn't spin. // This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len; s_video_buffer_write_ptr = write_ptr + len;
} }
@ -316,7 +316,7 @@ void RunGpuLoop()
if (write_ptr > seen_ptr) if (write_ptr > seen_ptr)
{ {
s_video_buffer_read_ptr = s_video_buffer_read_ptr =
OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); OpcodeDecoder::RunFifo(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr);
s_video_buffer_seen_ptr = write_ptr; s_video_buffer_seen_ptr = write_ptr;
} }
} }
@ -349,8 +349,8 @@ void RunGpuLoop()
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32); fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32);
u8* write_ptr = s_video_buffer_write_ptr; u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder::Run( s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted);
fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed); fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed);
fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst); fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst);
@ -466,8 +466,8 @@ static int RunGpuOnCpu(int ticks)
} }
ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed)); ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed));
u32 cycles = 0; u32 cycles = 0;
s_video_buffer_read_ptr = OpcodeDecoder::Run( s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false); DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles);
available_ticks -= cycles; available_ticks -= cycles;
} }

View File

@ -837,12 +837,12 @@ bool FramebufferManager::CompilePokePipelines()
{ {
PortableVertexDeclaration vtx_decl = {}; PortableVertexDeclaration vtx_decl = {};
vtx_decl.position.enable = true; vtx_decl.position.enable = true;
vtx_decl.position.type = VAR_FLOAT; vtx_decl.position.type = ComponentFormat::Float;
vtx_decl.position.components = 4; vtx_decl.position.components = 4;
vtx_decl.position.integer = false; vtx_decl.position.integer = false;
vtx_decl.position.offset = offsetof(EFBPokeVertex, position); vtx_decl.position.offset = offsetof(EFBPokeVertex, position);
vtx_decl.colors[0].enable = true; vtx_decl.colors[0].enable = true;
vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; vtx_decl.colors[0].type = ComponentFormat::UByte;
vtx_decl.colors[0].components = 4; vtx_decl.colors[0].components = 4;
vtx_decl.colors[0].integer = false; vtx_decl.colors[0].integer = false;
vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color); vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color);

View File

@ -6,25 +6,29 @@
#include <cmath> #include <cmath>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/DriverDetails.h" #include "VideoCommon/DriverDetails.h"
#include "VideoCommon/LightingShaderGen.h" #include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h" #include "VideoCommon/XFMemory.h"
constexpr std::array<const char*, 4> primitives_ogl{ constexpr Common::EnumMap<const char*, PrimitiveType::TriangleStrip> primitives_ogl{
"points", "points",
"lines", "lines",
"triangles", "triangles",
"triangles", "triangles",
}; };
constexpr std::array<const char*, 4> primitives_d3d{ constexpr Common::EnumMap<const char*, PrimitiveType::TriangleStrip> primitives_d3d{
"point", "point",
"line", "line",
"triangle", "triangle",
"triangle", "triangle",
}; };
constexpr Common::EnumMap<u32, PrimitiveType::TriangleStrip> vertex_in_map{1u, 2u, 3u, 3u};
constexpr Common::EnumMap<u32, PrimitiveType::TriangleStrip> vertex_out_map{4u, 4u, 4u, 3u};
bool geometry_shader_uid_data::IsPassthrough() const bool geometry_shader_uid_data::IsPassthrough() const
{ {
const bool stereo = g_ActiveConfig.stereo_mode != StereoMode::Off; const bool stereo = g_ActiveConfig.stereo_mode != StereoMode::Off;
@ -61,9 +65,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
const bool ssaa = host_config.ssaa; const bool ssaa = host_config.ssaa;
const bool stereo = host_config.stereo; const bool stereo = host_config.stereo;
const auto primitive_type = static_cast<PrimitiveType>(uid_data->primitive_type); const auto primitive_type = static_cast<PrimitiveType>(uid_data->primitive_type);
const auto primitive_type_index = static_cast<unsigned>(uid_data->primitive_type); const u32 vertex_in = vertex_in_map[primitive_type];
const auto vertex_in = std::min(static_cast<unsigned>(primitive_type_index) + 1, 3u); u32 vertex_out = vertex_out_map[primitive_type];
u32 vertex_out = primitive_type == PrimitiveType::TriangleStrip ? 3 : 4;
if (wireframe) if (wireframe)
vertex_out++; vertex_out++;
@ -73,14 +76,14 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
// Insert layout parameters // Insert layout parameters
if (host_config.backend_gs_instancing) if (host_config.backend_gs_instancing)
{ {
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index], out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type],
stereo ? 2 : 1); stereo ? 2 : 1);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
vertex_out); vertex_out);
} }
else else
{ {
out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]); out.Write("layout({}) in;\n", primitives_ogl[primitive_type]);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
stereo ? vertex_out * 2 : vertex_out); stereo ? vertex_out * 2 : vertex_out);
} }
@ -139,13 +142,13 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1); out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint " out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint "
"InstanceID : SV_GSInstanceID)\n{{\n", "InstanceID : SV_GSInstanceID)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle"); primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle");
} }
else else
{ {
out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out); out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n", out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle"); primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle");
} }
out.Write("\tVertexData ps;\n"); out.Write("\tVertexData ps;\n");

View File

@ -202,25 +202,27 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index)
void IndexGenerator::Init() void IndexGenerator::Init()
{ {
using OpcodeDecoder::Primitive;
if (g_Config.backend_info.bSupportsPrimitiveRestart) if (g_Config.backend_info.bSupportsPrimitiveRestart)
{ {
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<true>; m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>; m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<true>; m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>; m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<true>; m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
} }
else else
{ {
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<false>; m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>; m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<false>; m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>; m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<false>; m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
} }
m_primitive_table[OpcodeDecoder::GX_DRAW_LINES] = AddLineList; m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList;
m_primitive_table[OpcodeDecoder::GX_DRAW_LINE_STRIP] = AddLineStrip; m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[OpcodeDecoder::GX_DRAW_POINTS] = AddPoints; m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints;
} }
void IndexGenerator::Start(u16* index_ptr) void IndexGenerator::Start(u16* index_ptr)
@ -230,7 +232,7 @@ void IndexGenerator::Start(u16* index_ptr)
m_base_index = 0; m_base_index = 0;
} }
void IndexGenerator::AddIndices(int primitive, u32 num_vertices) void IndexGenerator::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
{ {
m_index_buffer_current = m_index_buffer_current =
m_primitive_table[primitive](m_index_buffer_current, num_vertices, m_base_index); m_primitive_table[primitive](m_index_buffer_current, num_vertices, m_base_index);

View File

@ -6,8 +6,9 @@
#pragma once #pragma once
#include <array>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/OpcodeDecoding.h"
class IndexGenerator class IndexGenerator
{ {
@ -15,7 +16,7 @@ public:
void Init(); void Init();
void Start(u16* index_ptr); void Start(u16* index_ptr);
void AddIndices(int primitive, u32 num_vertices); void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices); void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices);
@ -30,5 +31,5 @@ private:
u32 m_base_index = 0; u32 m_base_index = 0;
using PrimitiveFunction = u16* (*)(u16*, u32, u32); using PrimitiveFunction = u16* (*)(u16*, u32, u32);
std::array<PrimitiveFunction, 8> m_primitive_table{}; Common::EnumMap<PrimitiveFunction, OpcodeDecoder::Primitive::GX_DRAW_POINTS> m_primitive_table{};
}; };

View File

@ -8,6 +8,7 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Hash.h" #include "Common/Hash.h"
#include "VideoCommon/CPMemory.h"
// m_components // m_components
enum enum
@ -45,18 +46,9 @@ enum
VB_HAS_UVTEXMTXSHIFT = 13, VB_HAS_UVTEXMTXSHIFT = 13,
}; };
enum VarType
{
VAR_UNSIGNED_BYTE, // GX_U8 = 0
VAR_BYTE, // GX_S8 = 1
VAR_UNSIGNED_SHORT, // GX_U16 = 2
VAR_SHORT, // GX_S16 = 3
VAR_FLOAT, // GX_F32 = 4
};
struct AttributeFormat struct AttributeFormat
{ {
VarType type; ComponentFormat type;
int components; int components;
int offset; int offset;
bool enable; bool enable;

View File

@ -14,7 +14,7 @@
#include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/OpcodeDecoding.h"
#include "Common/CommonTypes.h" #include "Common/Assert.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Core/FifoPlayer/FifoRecorder.h" #include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
@ -24,55 +24,15 @@
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h" #include "VideoCommon/Fifo.h"
#include "VideoCommon/Statistics.h" #include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/XFMemory.h" #include "VideoCommon/XFMemory.h"
#include "VideoCommon/XFStructs.h"
namespace OpcodeDecoder namespace OpcodeDecoder
{ {
namespace static bool s_is_fifo_error_seen = false;
{
bool s_is_fifo_error_seen = false;
u32 InterpretDisplayList(u32 address, u32 size)
{
u8* start_address;
if (Fifo::UseDeterministicGPUThread())
start_address = static_cast<u8*>(Fifo::PopFifoAuxBuffer(size));
else
start_address = Memory::GetPointer(address);
u32 cycles = 0;
// Avoid the crash if Memory::GetPointer failed ..
if (start_address != nullptr)
{
// temporarily swap dl and non-dl (small "hack" for the stats)
g_stats.SwapDL();
Run(DataReader(start_address, start_address + size), &cycles, true);
INCSTAT(g_stats.this_frame.num_dlists_called);
// un-swap
g_stats.SwapDL();
}
return cycles;
}
void InterpretDisplayListPreprocess(u32 address, u32 size)
{
u8* const start_address = Memory::GetPointer(address);
Fifo::PushFifoAuxBuffer(start_address, size);
if (start_address == nullptr)
return;
Run<true>(DataReader(start_address, start_address + size), nullptr, true);
}
} // Anonymous namespace
bool g_record_fifo_data = false; bool g_record_fifo_data = false;
void Init() void Init()
@ -81,202 +41,228 @@ void Init()
} }
template <bool is_preprocess> template <bool is_preprocess>
u8* Run(DataReader src, u32* cycles, bool in_display_list) class RunCallback final : public Callback
{ {
u32 total_cycles = 0; public:
u8* opcode_start = nullptr; OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
const auto finish_up = [cycles, &opcode_start, &total_cycles] {
if (cycles != nullptr)
{
*cycles = total_cycles;
}
return opcode_start;
};
while (true)
{ {
opcode_start = src.GetPointer(); m_cycles += 18 + 6 * count;
if (!src.size())
return finish_up();
const u8 cmd_byte = src.Read<u8>();
switch (cmd_byte)
{
case GX_NOP:
total_cycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
break;
case GX_UNKNOWN_RESET:
total_cycles += 6; // Datel software uses this command
DEBUG_LOG_FMT(VIDEO, "GX Reset?: {:08x}", cmd_byte);
break;
case GX_LOAD_CP_REG:
{
if (src.size() < 1 + 4)
return finish_up();
total_cycles += 12;
const u8 sub_cmd = src.Read<u8>();
const u32 value = src.Read<u32>();
LoadCPReg(sub_cmd, value, is_preprocess);
if constexpr (!is_preprocess)
INCSTAT(g_stats.this_frame.num_cp_loads);
}
break;
case GX_LOAD_XF_REG:
{
if (src.size() < 4)
return finish_up();
const u32 cmd2 = src.Read<u32>();
const u32 transfer_size = ((cmd2 >> 16) & 15) + 1;
if (src.size() < transfer_size * sizeof(u32))
return finish_up();
total_cycles += 18 + 6 * transfer_size;
if constexpr (!is_preprocess)
{
const u32 xf_address = cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address, src);
INCSTAT(g_stats.this_frame.num_xf_loads);
}
src.Skip<u32>(transfer_size);
}
break;
case GX_LOAD_INDX_A: // Used for position matrices
case GX_LOAD_INDX_B: // Used for normal matrices
case GX_LOAD_INDX_C: // Used for postmatrices
case GX_LOAD_INDX_D: // Used for lights
{
if (src.size() < 4)
return finish_up();
total_cycles += 6;
// Map the command byte to its ref array.
// GX_LOAD_INDX_A (32) -> 0xC
// GX_LOAD_INDX_B (40) -> 0xD
// GX_LOAD_INDX_C (48) -> 0xE
// GX_LOAD_INDX_D (56) -> 0xF
const int ref_array = (cmd_byte / 8) + 8;
if constexpr (is_preprocess)
PreprocessIndexedXF(src.Read<u32>(), ref_array);
else
LoadIndexedXF(src.Read<u32>(), ref_array);
}
break;
case GX_CMD_CALL_DL:
{
if (src.size() < 8)
return finish_up();
const u32 address = src.Read<u32>();
const u32 count = src.Read<u32>();
if (in_display_list)
{
total_cycles += 6;
INFO_LOG_FMT(VIDEO, "recursive display list detected");
}
else
{
if constexpr (is_preprocess)
InterpretDisplayListPreprocess(address, count);
else
total_cycles += 6 + InterpretDisplayList(address, count);
}
}
break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after
// that
total_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "GX 0x44: {:08x}", cmd_byte);
break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
total_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)");
break;
case GX_LOAD_BP_REG:
// In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{
if (src.size() < 4)
return finish_up();
total_cycles += 12;
const u32 bp_cmd = src.Read<u32>();
if constexpr (is_preprocess)
{
LoadBPRegPreprocess(bp_cmd, total_cycles);
}
else
{
LoadBPReg(bp_cmd, total_cycles);
INCSTAT(g_stats.this_frame.num_bp_loads);
}
}
break;
// draw primitives
default:
if ((cmd_byte & 0xC0) == 0x80)
{
// load vertices
if (src.size() < 2)
return finish_up();
const u16 num_vertices = src.Read<u16>();
const int bytes = VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, num_vertices, src, is_preprocess);
if (bytes < 0)
return finish_up();
src.Skip(bytes);
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
total_cycles += num_vertices * 4 * 3 + 6;
}
else
{
if (!s_is_fifo_error_seen)
CommandProcessor::HandleUnknownOpcode(cmd_byte, opcode_start, is_preprocess);
ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", cmd_byte,
fmt::ptr(opcode_start), is_preprocess ? "yes" : "no");
s_is_fifo_error_seen = true;
total_cycles += 1;
}
break;
}
// Display lists get added directly into the FIFO stream
if constexpr (!is_preprocess) if constexpr (!is_preprocess)
{ {
if (g_record_fifo_data && cmd_byte != GX_CMD_CALL_DL) LoadXFReg(address, count, data);
INCSTAT(g_stats.this_frame.num_xf_loads);
}
}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
{
m_cycles += 12;
const u8 sub_command = command & CP_COMMAND_MASK;
if constexpr (!is_preprocess)
{
if (sub_command == MATINDEX_A)
VertexShaderManager::SetTexMatrixChangedA(value);
else if (sub_command == MATINDEX_B)
VertexShaderManager::SetTexMatrixChangedB(value);
else if (sub_command == VCD_LO || sub_command == VCD_HI)
{ {
const u8* const opcode_end = src.GetPointer(); VertexLoaderManager::g_main_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
FifoRecorder::GetInstance().WriteGPCommand(opcode_start, u32(opcode_end - opcode_start)); VertexLoaderManager::g_bases_dirty = true;
}
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
sub_command == CP_VAT_REG_C)
{
VertexLoaderManager::g_main_vat_dirty[command & CP_VAT_MASK] = true;
}
else if (sub_command == ARRAY_BASE)
{
VertexLoaderManager::g_bases_dirty = true;
}
INCSTAT(g_stats.this_frame.num_cp_loads);
}
else if constexpr (is_preprocess)
{
if (sub_command == VCD_LO || sub_command == VCD_HI)
{
VertexLoaderManager::g_preprocess_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
}
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
sub_command == CP_VAT_REG_C)
{
VertexLoaderManager::g_preprocess_vat_dirty[command & CP_VAT_MASK] = true;
}
}
GetCPState().LoadCPReg(command, value);
}
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
{
m_cycles += 12;
if constexpr (is_preprocess)
{
LoadBPRegPreprocess(command, value, m_cycles);
}
else
{
LoadBPReg(command, value, m_cycles);
INCSTAT(g_stats.this_frame.num_bp_loads);
}
}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
{
m_cycles += 6;
if constexpr (is_preprocess)
PreprocessIndexedXF(array, index, address, size);
else
LoadIndexedXF(array, index, address, size);
}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
{
// load vertices
const u32 size = vertex_size * num_vertices;
// HACK
DataReader src{const_cast<u8*>(vertex_data), const_cast<u8*>(vertex_data) + size};
const u32 bytes =
VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src, is_preprocess);
ASSERT(bytes == size);
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
m_cycles += num_vertices * 4 * 3 + 6;
}
// This can't be inlined since it calls Run, which makes it recursive
// m_in_display_list prevents it from actually recursing infinitely, but there's no real benefit
// to inlining Run for the display list directly.
OPCODE_CALLBACK_NOINLINE(void OnDisplayList(u32 address, u32 size))
{
m_cycles += 6;
if (m_in_display_list)
{
WARN_LOG_FMT(VIDEO, "recursive display list detected");
}
else
{
m_in_display_list = true;
if constexpr (is_preprocess)
{
const u8* const start_address = Memory::GetPointer(address);
Fifo::PushFifoAuxBuffer(start_address, size);
if (start_address != nullptr)
{
Run(start_address, size, *this);
}
}
else
{
const u8* start_address;
if (Fifo::UseDeterministicGPUThread())
start_address = static_cast<u8*>(Fifo::PopFifoAuxBuffer(size));
else
start_address = Memory::GetPointer(address);
// Avoid the crash if Memory::GetPointer failed ..
if (start_address != nullptr)
{
// temporarily swap dl and non-dl (small "hack" for the stats)
g_stats.SwapDL();
Run(start_address, size, *this);
INCSTAT(g_stats.this_frame.num_dlists_called);
// un-swap
g_stats.SwapDL();
}
}
m_in_display_list = false;
}
}
OPCODE_CALLBACK(void OnNop(u32 count))
{
m_cycles += 6 * count; // Hm, this means that we scan over nop streams pretty slowly...
}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
{
if (static_cast<Opcode>(opcode) == Opcode::GX_UNKNOWN_RESET)
{
// Datel software uses this command
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "GX Reset?");
}
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS)
{
// 'Zelda Four Swords' calls it and checks the metrics registers after that
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "GX 0x44");
}
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_INVL_VC)
{
// Invalidate Vertex Cache
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)");
}
else
{
if (!s_is_fifo_error_seen)
CommandProcessor::HandleUnknownOpcode(opcode, data, is_preprocess);
ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", opcode,
fmt::ptr(data), is_preprocess ? "yes" : "no");
s_is_fifo_error_seen = true;
m_cycles += 1;
}
}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size))
{
ASSERT(size >= 1);
if constexpr (!is_preprocess)
{
// Display lists get added directly into the FIFO stream since this same callback is used to
// process them.
if (g_record_fifo_data && static_cast<Opcode>(data[0]) != Opcode::GX_CMD_CALL_DL)
{
FifoRecorder::GetInstance().WriteGPCommand(data, size);
} }
} }
} }
OPCODE_CALLBACK(CPState& GetCPState())
{
if constexpr (is_preprocess)
return g_preprocess_cp_state;
else
return g_main_cp_state;
}
u32 m_cycles = 0;
bool m_in_display_list = false;
};
template <bool is_preprocess>
u8* RunFifo(DataReader src, u32* cycles)
{
using CallbackT = RunCallback<is_preprocess>;
auto callback = CallbackT{};
u32 size = Run(src.GetPointer(), static_cast<u32>(src.size()), callback);
if (cycles != nullptr)
*cycles = callback.m_cycles;
src.Skip(size);
return src.GetPointer();
} }
template u8* Run<true>(DataReader src, u32* cycles, bool in_display_list); template u8* RunFifo<true>(DataReader src, u32* cycles);
template u8* Run<false>(DataReader src, u32* cycles, bool in_display_list); template u8* RunFifo<false>(DataReader src, u32* cycles);
} // namespace OpcodeDecoder } // namespace OpcodeDecoder

View File

@ -3,8 +3,17 @@
#pragma once #pragma once
#include "Common/CommonTypes.h" #include <type_traits>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h"
#include "Common/Inline.h"
#include "Common/Swap.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/VertexLoaderBase.h"
struct CPState;
class DataReader; class DataReader;
namespace OpcodeDecoder namespace OpcodeDecoder
@ -12,7 +21,7 @@ namespace OpcodeDecoder
// Global flag to signal if FifoRecorder is active. // Global flag to signal if FifoRecorder is active.
extern bool g_record_fifo_data; extern bool g_record_fifo_data;
enum enum class Opcode
{ {
GX_NOP = 0x00, GX_NOP = 0x00,
GX_UNKNOWN_RESET = 0x01, GX_UNKNOWN_RESET = 0x01,
@ -27,20 +36,20 @@ enum
GX_CMD_CALL_DL = 0x40, GX_CMD_CALL_DL = 0x40,
GX_CMD_UNKNOWN_METRICS = 0x44, GX_CMD_UNKNOWN_METRICS = 0x44,
GX_CMD_INVL_VC = 0x48 GX_CMD_INVL_VC = 0x48,
GX_PRIMITIVE_START = 0x80,
GX_PRIMITIVE_END = 0xbf,
}; };
enum constexpr u8 GX_PRIMITIVE_MASK = 0x78;
{ constexpr u32 GX_PRIMITIVE_SHIFT = 3;
GX_PRIMITIVE_MASK = 0x78, constexpr u8 GX_VAT_MASK = 0x07;
GX_PRIMITIVE_SHIFT = 3,
GX_VAT_MASK = 0x07
};
// These values are the values extracted using GX_PRIMITIVE_MASK // These values are the values extracted using GX_PRIMITIVE_MASK
// and GX_PRIMITIVE_SHIFT. // and GX_PRIMITIVE_SHIFT.
// GX_DRAW_QUADS_2 behaves the same way as GX_DRAW_QUADS. // GX_DRAW_QUADS_2 behaves the same way as GX_DRAW_QUADS.
enum enum class Primitive : u8
{ {
GX_DRAW_QUADS = 0x0, // 0x80 GX_DRAW_QUADS = 0x0, // 0x80
GX_DRAW_QUADS_2 = 0x1, // 0x88 GX_DRAW_QUADS_2 = 0x1, // 0x88
@ -54,7 +63,232 @@ enum
void Init(); void Init();
// Interface for the Run and RunCommand functions below.
// The functions themselves are templates so that the compiler generates separate versions for each
// callback (with the callback functions inlined), so the callback doesn't actually need to be
// publicly inherited.
// Compilers don't generate warnings for failed inlining with virtual functions, so this define
// allows disabling the use of virtual functions to generate those warnings. However, this means
// that missing functions will generate errors on their use in RunCommand, instead of in the
// subclass, which can be confusing.
#define OPCODE_CALLBACK_USE_INHERITANCE
#ifdef OPCODE_CALLBACK_USE_INHERITANCE
#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig override
#define OPCODE_CALLBACK_NOINLINE(sig) sig override
#else
#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig
#define OPCODE_CALLBACK_NOINLINE(sig) sig
#endif
class Callback
{
#ifdef OPCODE_CALLBACK_USE_INHERITANCE
public:
virtual ~Callback() = default;
// Called on any XF command.
virtual void OnXF(u16 address, u8 count, const u8* data) = 0;
// Called on any CP command.
// Subclasses should update the CP state with GetCPState().LoadCPReg(command, value) so that
// primitive commands decode properly.
virtual void OnCP(u8 command, u32 value) = 0;
// Called on any BP command.
virtual void OnBP(u8 command, u32 value) = 0;
// Called on any indexed XF load command.
virtual void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size) = 0;
// Called on any primitive command.
virtual void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, u32 vertex_size,
u16 num_vertices, const u8* vertex_data) = 0;
// Called on a display list.
virtual void OnDisplayList(u32 address, u32 size) = 0;
// Called on any NOP commands (which are all merged into a single call).
virtual void OnNop(u32 count) = 0;
// Called on an unknown opcode, or an opcode that is known but not implemented.
// data[0] is opcode.
virtual void OnUnknown(u8 opcode, const u8* data) = 0;
// Called on ANY command. The first byte of data is the opcode. Size will be at least 1.
// This function is called after one of the above functions is called.
virtual void OnCommand(const u8* data, u32 size) = 0;
// Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands.
virtual CPState& GetCPState() = 0;
#endif
};
namespace detail
{
// Main logic; split so that the main RunCommand can call OnCommand with the returned size.
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback)
{
if (available < 1)
return 0;
const Opcode cmd = static_cast<Opcode>(data[0]);
switch (cmd)
{
case Opcode::GX_NOP:
{
u32 count = 1;
while (count < available && static_cast<Opcode>(data[count]) == Opcode::GX_NOP)
count++;
callback.OnNop(count);
return count;
}
case Opcode::GX_LOAD_CP_REG:
{
if (available < 6)
return 0;
const u8 cmd2 = data[1];
const u32 value = Common::swap32(&data[2]);
callback.OnCP(cmd2, value);
return 6;
}
case Opcode::GX_LOAD_XF_REG:
{
if (available < 5)
return 0;
const u32 cmd2 = Common::swap32(&data[1]);
const u16 base_address = cmd2 & 0xffff;
const u16 stream_size_temp = cmd2 >> 16;
ASSERT(stream_size_temp < 16);
const u8 stream_size = (stream_size_temp & 0xf) + 1;
if (available < u32(5 + stream_size * 4))
return 0;
callback.OnXF(base_address, stream_size, &data[5]);
return 5 + stream_size * 4;
}
case Opcode::GX_LOAD_INDX_A: // Used for position matrices
case Opcode::GX_LOAD_INDX_B: // Used for normal matrices
case Opcode::GX_LOAD_INDX_C: // Used for postmatrices
case Opcode::GX_LOAD_INDX_D: // Used for lights
{
if (available < 5)
return 0;
const u32 value = Common::swap32(&data[1]);
const u32 index = value >> 16;
const u16 address = value & 0xFFF; // TODO: check mask
const u8 size = ((value >> 12) & 0xF) + 1;
// Map the command byte to its ref array.
// GX_LOAD_INDX_A (32 = 8*4) . CPArray::XF_A (4+8 = 12)
// GX_LOAD_INDX_B (40 = 8*5) . CPArray::XF_B (5+8 = 13)
// GX_LOAD_INDX_C (48 = 8*6) . CPArray::XF_C (6+8 = 14)
// GX_LOAD_INDX_D (56 = 8*7) . CPArray::XF_D (7+8 = 15)
const auto ref_array = static_cast<CPArray>((static_cast<u8>(cmd) / 8) + 8);
callback.OnIndexedLoad(ref_array, index, address, size);
return 5;
}
case Opcode::GX_CMD_CALL_DL:
{
if (available < 9)
return 0;
const u32 address = Common::swap32(&data[1]);
const u32 size = Common::swap32(&data[5]);
callback.OnDisplayList(address, size);
return 9;
}
case Opcode::GX_LOAD_BP_REG:
{
if (available < 5)
return 0;
const u8 cmd2 = data[1];
const u32 value = Common::swap24(&data[2]);
callback.OnBP(cmd2, value);
return 5;
}
default:
if (cmd >= Opcode::GX_PRIMITIVE_START && cmd <= Opcode::GX_PRIMITIVE_END)
{
if (available < 3)
return 0;
const u8 cmdbyte = static_cast<u8>(cmd);
const OpcodeDecoder::Primitive primitive = static_cast<OpcodeDecoder::Primitive>(
(cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT);
const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK;
const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc,
callback.GetCPState().vtx_attr[vat]);
const u16 num_vertices = Common::swap16(&data[1]);
if (available < 3 + num_vertices * vertex_size)
return 0;
callback.OnPrimitiveCommand(primitive, vat, vertex_size, num_vertices, &data[3]);
return 3 + num_vertices * vertex_size;
}
}
callback.OnUnknown(static_cast<u8>(cmd), data);
return 1;
}
} // namespace detail
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback)
{
const u32 size = detail::RunCommand(data, available, callback);
if (size > 0)
{
callback.OnCommand(data, size);
}
return size;
}
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
DOLPHIN_FORCE_INLINE u32 Run(const u8* data, u32 available, T& callback)
{
u32 size = 0;
while (size < available)
{
const u32 command_size = RunCommand(&data[size], available - size, callback);
if (command_size == 0)
break;
size += command_size;
}
return size;
}
template <bool is_preprocess = false> template <bool is_preprocess = false>
u8* Run(DataReader src, u32* cycles, bool in_display_list); u8* RunFifo(DataReader src, u32* cycles);
} // namespace OpcodeDecoder } // namespace OpcodeDecoder
template <>
struct fmt::formatter<OpcodeDecoder::Primitive>
: EnumFormatter<OpcodeDecoder::Primitive::GX_DRAW_POINTS>
{
static constexpr array_type names = {
"GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)",
"GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP",
"GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES",
"GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS",
};
formatter() : EnumFormatter(names) {}
};

View File

@ -8,6 +8,7 @@
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "VideoCommon/BPMemory.h" #include "VideoCommon/BPMemory.h"
#include "VideoCommon/BoundingBox.h" #include "VideoCommon/BoundingBox.h"
@ -40,7 +41,7 @@ enum : u32
C_PENVCONST_END = C_EFBSCALE + 1 C_PENVCONST_END = C_EFBSCALE + 1
}; };
constexpr std::array<const char*, 32> tev_ksel_table_c{ constexpr Common::EnumMap<const char*, KonstSel::K3_A> tev_ksel_table_c{
"255,255,255", // 1 = 0x00 "255,255,255", // 1 = 0x00
"223,223,223", // 7_8 = 0x01 "223,223,223", // 7_8 = 0x01
"191,191,191", // 3_4 = 0x02 "191,191,191", // 3_4 = 0x02
@ -75,7 +76,7 @@ constexpr std::array<const char*, 32> tev_ksel_table_c{
I_KCOLORS "[3].aaa", // K3_A = 0x1F I_KCOLORS "[3].aaa", // K3_A = 0x1F
}; };
constexpr std::array<const char*, 32> tev_ksel_table_a{ constexpr Common::EnumMap<const char*, KonstSel::K3_A> tev_ksel_table_a{
"255", // 1 = 0x00 "255", // 1 = 0x00
"223", // 7_8 = 0x01 "223", // 7_8 = 0x01
"191", // 3_4 = 0x02 "191", // 3_4 = 0x02
@ -110,7 +111,7 @@ constexpr std::array<const char*, 32> tev_ksel_table_a{
I_KCOLORS "[3].a", // K3_A = 0x1F I_KCOLORS "[3].a", // K3_A = 0x1F
}; };
constexpr std::array<const char*, 16> tev_c_input_table{ constexpr Common::EnumMap<const char*, TevColorArg::Zero> tev_c_input_table{
"prev.rgb", // CPREV, "prev.rgb", // CPREV,
"prev.aaa", // APREV, "prev.aaa", // APREV,
"c0.rgb", // C0, "c0.rgb", // C0,
@ -129,7 +130,7 @@ constexpr std::array<const char*, 16> tev_c_input_table{
"int3(0,0,0)", // ZERO "int3(0,0,0)", // ZERO
}; };
constexpr std::array<const char*, 8> tev_a_input_table{ constexpr Common::EnumMap<const char*, TevAlphaArg::Zero> tev_a_input_table{
"prev.a", // APREV, "prev.a", // APREV,
"c0.a", // A0, "c0.a", // A0,
"c1.a", // A1, "c1.a", // A1,
@ -140,7 +141,7 @@ constexpr std::array<const char*, 8> tev_a_input_table{
"0", // ZERO "0", // ZERO
}; };
constexpr std::array<const char*, 8> tev_ras_table{ constexpr Common::EnumMap<const char*, RasColorChan::Zero> tev_ras_table{
"iround(col0 * 255.0)", "iround(col0 * 255.0)",
"iround(col1 * 255.0)", "iround(col1 * 255.0)",
"ERROR13", // 2 "ERROR13", // 2
@ -151,14 +152,14 @@ constexpr std::array<const char*, 8> tev_ras_table{
"int4(0, 0, 0, 0)", // zero "int4(0, 0, 0, 0)", // zero
}; };
constexpr std::array<const char*, 4> tev_c_output_table{ constexpr Common::EnumMap<const char*, TevOutput::Color2> tev_c_output_table{
"prev.rgb", "prev.rgb",
"c0.rgb", "c0.rgb",
"c1.rgb", "c1.rgb",
"c2.rgb", "c2.rgb",
}; };
constexpr std::array<const char*, 4> tev_a_output_table{ constexpr Common::EnumMap<const char*, TevOutput::Color2> tev_a_output_table{
"prev.a", "prev.a",
"c0.a", "c0.a",
"c1.a", "c1.a",
@ -1160,11 +1161,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac; last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac;
if (last_cc.dest != TevOutput::Prev) if (last_cc.dest != TevOutput::Prev)
{ {
out.Write("\tprev.rgb = {};\n", tev_c_output_table[u32(last_cc.dest.Value())]); out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]);
} }
if (last_ac.dest != TevOutput::Prev) if (last_ac.dest != TevOutput::Prev)
{ {
out.Write("\tprev.a = {};\n", tev_a_output_table[u32(last_ac.dest.Value())]); out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]);
} }
} }
out.Write("\tprev = prev & 255;\n"); out.Write("\tprev = prev & 255;\n");
@ -1277,6 +1278,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
APIType api_type, bool stereo) APIType api_type, bool stereo)
{ {
using Common::EnumMap;
const auto& stage = uid_data->stagehash[n]; const auto& stage = uid_data->stagehash[n];
out.Write("\n\t// TEV stage {}\n", n); out.Write("\n\t// TEV stage {}\n", n);
@ -1303,7 +1306,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
// using iindtex{} as the offset coords // using iindtex{} as the offset coords
if (has_ind_stage && tevind.bs != IndTexBumpAlpha::Off) if (has_ind_stage && tevind.bs != IndTexBumpAlpha::Off)
{ {
static constexpr std::array<const char*, 4> tev_ind_alpha_sel{ static constexpr EnumMap<const char*, IndTexBumpAlpha::U> tev_ind_alpha_sel{
"", "",
"x", "x",
"y", "y",
@ -1316,16 +1319,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
// https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L3038-L3041 // https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L3038-L3041
// https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L790-L800 // https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L790-L800
static constexpr std::array<char, 4> tev_ind_alpha_shift{ static constexpr EnumMap<char, IndTexFormat::ITF_3> tev_ind_alpha_shift{
'0', // ITF_8: 0bXXXXXYYY -> 0bXXXXX000? No shift? '0', // ITF_8: 0bXXXXXYYY -> 0bXXXXX000? No shift?
'5', // ITF_5: 0bIIIIIAAA -> 0bAAA00000, shift of 5 '5', // ITF_5: 0bIIIIIAAA -> 0bAAA00000, shift of 5
'4', // ITF_4: 0bIIIIAAAA -> 0bAAAA0000, shift of 4 '4', // ITF_4: 0bIIIIAAAA -> 0bAAAA0000, shift of 4
'3', // ITF_3: 0bIIIAAAAA -> 0bAAAAA000, shift of 3 '3', // ITF_3: 0bIIIAAAAA -> 0bAAAAA000, shift of 3
}; };
out.Write("\talphabump = (iindtex{}.{} << {}) & 248;\n", tevind.bt.Value(), out.Write("\talphabump = (iindtex{}.{} << {}) & 248;\n", tevind.bt,
tev_ind_alpha_sel[u32(tevind.bs.Value())], tev_ind_alpha_sel[tevind.bs], tev_ind_alpha_shift[tevind.fmt]);
tev_ind_alpha_shift[u32(tevind.fmt.Value())]);
} }
else else
{ {
@ -1335,23 +1337,23 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
if (has_ind_stage && tevind.matrix_index != IndMtxIndex::Off) if (has_ind_stage && tevind.matrix_index != IndMtxIndex::Off)
{ {
// format // format
static constexpr std::array<char, 4> tev_ind_fmt_shift{ static constexpr EnumMap<char, IndTexFormat::ITF_3> tev_ind_fmt_shift{
'0', // ITF_8: 0bXXXXXXXX -> 0bXXXXXXXX, no shift '0', // ITF_8: 0bXXXXXXXX -> 0bXXXXXXXX, no shift
'3', // ITF_5: 0bIIIIIAAA -> 0b000IIIII, shift of 3 '3', // ITF_5: 0bIIIIIAAA -> 0b000IIIII, shift of 3
'4', // ITF_4: 0bIIIIAAAA -> 0b0000IIII, shift of 4 '4', // ITF_4: 0bIIIIAAAA -> 0b0000IIII, shift of 4
'5', // ITF_3: 0bIIIAAAAA -> 0b00000III, shift of 5 '5', // ITF_3: 0bIIIAAAAA -> 0b00000III, shift of 5
}; };
out.Write("\tint3 iindtevcrd{} = iindtex{} >> {};\n", n, tevind.bt.Value(), out.Write("\tint3 iindtevcrd{} = iindtex{} >> {};\n", n, tevind.bt,
tev_ind_fmt_shift[u32(tevind.fmt.Value())]); tev_ind_fmt_shift[tevind.fmt]);
// bias - TODO: Check if this needs to be this complicated... // bias - TODO: Check if this needs to be this complicated...
// indexed by bias // indexed by bias
static constexpr std::array<const char*, 8> tev_ind_bias_field{ static constexpr EnumMap<const char*, IndTexBias::STU> tev_ind_bias_field{
"", "x", "y", "xy", "z", "xz", "yz", "xyz", "", "x", "y", "xy", "z", "xz", "yz", "xyz",
}; };
// indexed by fmt // indexed by fmt
static constexpr std::array<const char*, 4> tev_ind_bias_add{ static constexpr EnumMap<const char*, IndTexFormat::ITF_3> tev_ind_bias_add{
"-128", "-128",
"1", "1",
"1", "1",
@ -1361,22 +1363,19 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
if (tevind.bias == IndTexBias::S || tevind.bias == IndTexBias::T || if (tevind.bias == IndTexBias::S || tevind.bias == IndTexBias::T ||
tevind.bias == IndTexBias::U) tevind.bias == IndTexBias::U)
{ {
out.Write("\tiindtevcrd{}.{} += int({});\n", n, out.Write("\tiindtevcrd{}.{} += int({});\n", n, tev_ind_bias_field[tevind.bias],
tev_ind_bias_field[u32(tevind.bias.Value())], tev_ind_bias_add[tevind.fmt]);
tev_ind_bias_add[u32(tevind.fmt.Value())]);
} }
else if (tevind.bias == IndTexBias::ST || tevind.bias == IndTexBias::SU || else if (tevind.bias == IndTexBias::ST || tevind.bias == IndTexBias::SU ||
tevind.bias == IndTexBias::TU_) tevind.bias == IndTexBias::TU_)
{ {
out.Write("\tiindtevcrd{0}.{1} += int2({2}, {2});\n", n, out.Write("\tiindtevcrd{0}.{1} += int2({2}, {2});\n", n, tev_ind_bias_field[tevind.bias],
tev_ind_bias_field[u32(tevind.bias.Value())], tev_ind_bias_add[tevind.fmt]);
tev_ind_bias_add[u32(tevind.fmt.Value())]);
} }
else if (tevind.bias == IndTexBias::STU) else if (tevind.bias == IndTexBias::STU)
{ {
out.Write("\tiindtevcrd{0}.{1} += int3({2}, {2}, {2});\n", n, out.Write("\tiindtevcrd{0}.{1} += int3({2}, {2}, {2});\n", n,
tev_ind_bias_field[u32(tevind.bias.Value())], tev_ind_bias_field[tevind.bias], tev_ind_bias_add[tevind.fmt]);
tev_ind_bias_add[u32(tevind.fmt.Value())]);
} }
// Multiplied by 2 because each matrix has two rows. // Multiplied by 2 because each matrix has two rows.
@ -1535,7 +1534,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
'\0', '\0',
}; };
out.Write("\trastemp = {}.{};\n", tev_ras_table[u32(stage.tevorders_colorchan)], rasswap); out.Write("\trastemp = {}.{};\n", tev_ras_table[stage.tevorders_colorchan], rasswap);
} }
if (stage.tevorders_enable && uid_data->genMode_numtexgens > 0) if (stage.tevorders_enable && uid_data->genMode_numtexgens > 0)
@ -1567,8 +1566,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
cc.d == TevColorArg::Konst || ac.a == TevAlphaArg::Konst || ac.b == TevAlphaArg::Konst || cc.d == TevColorArg::Konst || ac.a == TevAlphaArg::Konst || ac.b == TevAlphaArg::Konst ||
ac.c == TevAlphaArg::Konst || ac.d == TevAlphaArg::Konst) ac.c == TevAlphaArg::Konst || ac.d == TevAlphaArg::Konst)
{ {
out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[u32(stage.tevksel_kc)], out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[stage.tevksel_kc],
tev_ksel_table_a[u32(stage.tevksel_ka)]); tev_ksel_table_a[stage.tevksel_ka]);
if (u32(stage.tevksel_kc) > 7) if (u32(stage.tevksel_kc) > 7)
{ {
@ -1599,51 +1598,50 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VECTOR_BITWISE_AND)) if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VECTOR_BITWISE_AND))
{ {
out.Write("\ttevin_a = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.a.Value())], out.Write("\ttevin_a = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.a],
tev_a_input_table[u32(ac.a.Value())]); tev_a_input_table[ac.a]);
out.Write("\ttevin_b = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.b.Value())], out.Write("\ttevin_b = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.b],
tev_a_input_table[u32(ac.b.Value())]); tev_a_input_table[ac.b]);
out.Write("\ttevin_c = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.c.Value())], out.Write("\ttevin_c = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.c],
tev_a_input_table[u32(ac.c.Value())]); tev_a_input_table[ac.c]);
} }
else else
{ {
out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.a],
tev_c_input_table[u32(cc.a.Value())], tev_a_input_table[u32(ac.a.Value())]); tev_a_input_table[ac.a]);
out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.b],
tev_c_input_table[u32(cc.b.Value())], tev_a_input_table[u32(ac.b.Value())]); tev_a_input_table[ac.b]);
out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.c],
tev_c_input_table[u32(cc.c.Value())], tev_a_input_table[u32(ac.c.Value())]); tev_a_input_table[ac.c]);
} }
out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[u32(cc.d.Value())], out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[cc.d], tev_a_input_table[ac.d]);
tev_a_input_table[u32(ac.d.Value())]);
out.Write("\t// color combine\n"); out.Write("\t// color combine\n");
out.Write("\t{} = clamp(", tev_c_output_table[u32(cc.dest.Value())]); out.Write("\t{} = clamp(", tev_c_output_table[cc.dest]);
if (cc.bias != TevBias::Compare) if (cc.bias != TevBias::Compare)
{ {
WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.scale, false); WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.scale, false);
} }
else else
{ {
static constexpr std::array<const char*, 8> function_table{ static constexpr EnumMap<const char*, TevCompareMode::RGB8> tev_rgb_comparison_gt{
"((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TevCompareMode::R8, GT "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TevCompareMode::R8
"((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // R8, TevComparison::EQ "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // GR16
"((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : " "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // BGR24
"int3(0,0,0))", // GR16, GT "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // RGB8
"((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : "
"int3(0,0,0))", // GR16, EQ
"((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : "
"int3(0,0,0))", // BGR24, GT
"((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : "
"int3(0,0,0))", // BGR24, EQ
"(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // RGB8, GT
"((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // RGB8, EQ
}; };
const u32 mode = (u32(cc.compare_mode.Value()) << 1) | u32(cc.comparison.Value()); static constexpr EnumMap<const char*, TevCompareMode::RGB8> tev_rgb_comparison_eq{
out.Write(" tevin_d.rgb + "); "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0))", // TevCompareMode::R8
out.Write("{}", function_table[mode]); "((idot(tevin_a.rgb,comp16) == idot(tevin_b.rgb,comp16)) ? tevin_c.rgb : int3(0,0,0))", // GR16
"((idot(tevin_a.rgb,comp24) == idot(tevin_b.rgb,comp24)) ? tevin_c.rgb : int3(0,0,0))", // BGR24
"((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // RGB8
};
if (cc.comparison == TevComparison::EQ)
out.Write(" tevin_d.rgb + {}", tev_rgb_comparison_eq[cc.compare_mode]);
else
out.Write(" tevin_d.rgb + {}", tev_rgb_comparison_gt[cc.compare_mode]);
} }
if (cc.clamp) if (cc.clamp)
out.Write(", int3(0,0,0), int3(255,255,255))"); out.Write(", int3(0,0,0), int3(255,255,255))");
@ -1652,27 +1650,31 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
out.Write(";\n"); out.Write(";\n");
out.Write("\t// alpha combine\n"); out.Write("\t// alpha combine\n");
out.Write("\t{} = clamp(", tev_a_output_table[u32(ac.dest.Value())]); out.Write("\t{} = clamp(", tev_a_output_table[ac.dest]);
if (ac.bias != TevBias::Compare) if (ac.bias != TevBias::Compare)
{ {
WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.scale, true); WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.scale, true);
} }
else else
{ {
static constexpr std::array<const char*, 8> function_table{ static constexpr EnumMap<const char*, TevCompareMode::A8> tev_a_comparison_gt{
"((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8, GT "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8
"((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // R8, TevComparison::EQ "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16
"((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, GT "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24
"((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, EQ "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // A8
"((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, GT
"((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, EQ
"((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // A8, GT
"((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // A8, EQ
}; };
const u32 mode = (u32(ac.compare_mode.Value()) << 1) | u32(ac.comparison.Value()); static constexpr EnumMap<const char*, TevCompareMode::A8> tev_a_comparison_eq{
out.Write(" tevin_d.a + "); "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8
out.Write("{}", function_table[mode]); "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16,
"((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24,
"((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)", // A8
};
if (ac.comparison == TevComparison::EQ)
out.Write(" tevin_d.a + {}", tev_a_comparison_eq[ac.compare_mode]);
else
out.Write(" tevin_d.a + {}", tev_a_comparison_gt[ac.compare_mode]);
} }
if (ac.clamp) if (ac.clamp)
out.Write(", 0, 255)"); out.Write(", 0, 255)");
@ -1685,36 +1687,33 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op, static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op,
bool clamp, TevScale scale, bool alpha) bool clamp, TevScale scale, bool alpha)
{ {
static constexpr std::array<const char*, 4> tev_scale_table_left{ static constexpr Common::EnumMap<const char*, TevScale::Divide2> tev_scale_table_left{
"", // Scale1 "", // Scale1
" << 1", // Scale2 " << 1", // Scale2
" << 2", // Scale4 " << 2", // Scale4
"", // Divide2 "", // Divide2
}; };
static constexpr std::array<const char*, 4> tev_scale_table_right{ static constexpr Common::EnumMap<const char*, TevScale::Divide2> tev_scale_table_right{
"", // Scale1 "", // Scale1
"", // Scale2 "", // Scale2
"", // Scale4 "", // Scale4
" >> 1", // Divide2 " >> 1", // Divide2
}; };
// indexed by 2*op+(scale==Divide2) static constexpr Common::EnumMap<const char*, TevOp::Sub> tev_lerp_bias{
static constexpr std::array<const char*, 4> tev_lerp_bias{
"",
" + 128", " + 128",
"",
" + 127", " + 127",
}; };
static constexpr std::array<const char*, 4> tev_bias_table{ static constexpr Common::EnumMap<const char*, TevBias::Compare> tev_bias_table{
"", // Zero, "", // Zero,
" + 128", // AddHalf, " + 128", // AddHalf,
" - 128", // SubHalf, " - 128", // SubHalf,
"", "",
}; };
static constexpr std::array<char, 2> tev_op_table{ static constexpr Common::EnumMap<char, TevOp::Sub> tev_op_table{
'+', // TevOp::Add = 0, '+', // TevOp::Add = 0,
'-', // TevOp::Sub = 1, '-', // TevOp::Sub = 1,
}; };
@ -1724,17 +1723,16 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBia
// - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255 // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255
// - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy
// - a rounding bias is added before dividing by 256 // - a rounding bias is added before dividing by 256
out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[u32(bias)], out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[bias], tev_scale_table_left[scale]);
tev_scale_table_left[u32(scale)]); out.Write(" {} ", tev_op_table[op]);
out.Write(" {} ", tev_op_table[u32(op)]); out.Write("(((((tevin_a.{0}<<8) + "
out.Write("(((((tevin_a.{}<<8) + (tevin_b.{}-tevin_a.{})*(tevin_c.{}+(tevin_c.{}>>7))){}){})>>8)", "(tevin_b.{0}-tevin_a.{0})*(tevin_c.{0}+(tevin_c.{0}>>7))){1}){2})>>8)",
components, components, components, components, components, components, tev_scale_table_left[scale],
tev_scale_table_left[u32(scale)], ((scale == TevScale::Divide2) == alpha) ? tev_lerp_bias[op] : "");
tev_lerp_bias[2 * u32(op) + ((scale == TevScale::Divide2) == alpha)]); out.Write("){}", tev_scale_table_right[scale]);
out.Write("){}", tev_scale_table_right[u32(scale)]);
} }
constexpr std::array<const char*, 8> tev_alpha_funcs_table{ constexpr Common::EnumMap<const char*, CompareMode::Always> tev_alpha_funcs_table{
"(false)", // CompareMode::Never "(false)", // CompareMode::Never
"(prev.a < {})", // CompareMode::Less "(prev.a < {})", // CompareMode::Less
"(prev.a == {})", // CompareMode::Equal "(prev.a == {})", // CompareMode::Equal
@ -1745,7 +1743,7 @@ constexpr std::array<const char*, 8> tev_alpha_funcs_table{
"(true)" // CompareMode::Always "(true)" // CompareMode::Always
}; };
constexpr std::array<const char*, 4> tev_alpha_funclogic_table{ constexpr Common::EnumMap<const char*, AlphaTestOp::Xnor> tev_alpha_funclogic_table{
" && ", // and " && ", // and
" || ", // or " || ", // or
" != ", // xor " != ", // xor
@ -1763,9 +1761,9 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
const auto write_alpha_func = [&out](CompareMode mode, std::string_view ref) { const auto write_alpha_func = [&out](CompareMode mode, std::string_view ref) {
const bool has_no_arguments = mode == CompareMode::Never || mode == CompareMode::Always; const bool has_no_arguments = mode == CompareMode::Never || mode == CompareMode::Always;
if (has_no_arguments) if (has_no_arguments)
out.Write("{}", tev_alpha_funcs_table[u32(mode)]); out.Write("{}", tev_alpha_funcs_table[mode]);
else else
out.Write(tev_alpha_funcs_table[u32(mode)], ref); out.Write(tev_alpha_funcs_table[mode], ref);
}; };
out.SetConstantsUsed(C_ALPHA, C_ALPHA); out.SetConstantsUsed(C_ALPHA, C_ALPHA);
@ -1779,7 +1777,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
write_alpha_func(uid_data->alpha_test_comp0, alpha_ref[0]); write_alpha_func(uid_data->alpha_test_comp0, alpha_ref[0]);
// Lookup the logic op // Lookup the logic op
out.Write("{}", tev_alpha_funclogic_table[u32(uid_data->alpha_test_logic)]); out.Write("{}", tev_alpha_funclogic_table[uid_data->alpha_test_logic]);
// Lookup the second component from the alpha function table // Lookup the second component from the alpha function table
write_alpha_func(uid_data->alpha_test_comp1, alpha_ref[1]); write_alpha_func(uid_data->alpha_test_comp1, alpha_ref[1]);
@ -1809,7 +1807,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
out.Write("\t}}\n"); out.Write("\t}}\n");
} }
constexpr std::array<const char*, 8> tev_fog_funcs_table{ constexpr Common::EnumMap<const char*, FogType::BackwardsExpSq> tev_fog_funcs_table{
"", // No Fog "", // No Fog
"", // ? "", // ?
"", // Linear "", // Linear
@ -1866,7 +1864,7 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
if (uid_data->fog_fsel >= FogType::Exp) if (uid_data->fog_fsel >= FogType::Exp)
{ {
out.Write("{}", tev_fog_funcs_table[u32(uid_data->fog_fsel)]); out.Write("{}", tev_fog_funcs_table[uid_data->fog_fsel]);
} }
else else
{ {
@ -1919,7 +1917,8 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
{ {
if (uid_data->blend_enable) if (uid_data->blend_enable)
{ {
static constexpr std::array<const char*, 8> blend_src_factor{ using Common::EnumMap;
static constexpr EnumMap<const char*, SrcBlendFactor::InvDstAlpha> blend_src_factor{
"float3(0,0,0);", // ZERO "float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE "float3(1,1,1);", // ONE
"initial_ocol0.rgb;", // DSTCLR "initial_ocol0.rgb;", // DSTCLR
@ -1929,7 +1928,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"initial_ocol0.aaa;", // DSTALPHA "initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
}; };
static constexpr std::array<const char*, 8> blend_src_factor_alpha{ static constexpr EnumMap<const char*, SrcBlendFactor::InvDstAlpha> blend_src_factor_alpha{
"0.0;", // ZERO "0.0;", // ZERO
"1.0;", // ONE "1.0;", // ONE
"initial_ocol0.a;", // DSTCLR "initial_ocol0.a;", // DSTCLR
@ -1939,7 +1938,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"initial_ocol0.a;", // DSTALPHA "initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA "1.0 - initial_ocol0.a;", // INVDSTALPHA
}; };
static constexpr std::array<const char*, 8> blend_dst_factor{ static constexpr EnumMap<const char*, DstBlendFactor::InvDstAlpha> blend_dst_factor{
"float3(0,0,0);", // ZERO "float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE "float3(1,1,1);", // ONE
"ocol0.rgb;", // SRCCLR "ocol0.rgb;", // SRCCLR
@ -1949,7 +1948,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"initial_ocol0.aaa;", // DSTALPHA "initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
}; };
static constexpr std::array<const char*, 8> blend_dst_factor_alpha{ static constexpr EnumMap<const char*, DstBlendFactor::InvDstAlpha> blend_dst_factor_alpha{
"0.0;", // ZERO "0.0;", // ZERO
"1.0;", // ONE "1.0;", // ONE
"ocol0.a;", // SRCCLR "ocol0.a;", // SRCCLR
@ -1960,13 +1959,11 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"1.0 - initial_ocol0.a;", // INVDSTALPHA "1.0 - initial_ocol0.a;", // INVDSTALPHA
}; };
out.Write("\tfloat4 blend_src;\n"); out.Write("\tfloat4 blend_src;\n");
out.Write("\tblend_src.rgb = {}\n", blend_src_factor[u32(uid_data->blend_src_factor)]); out.Write("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]);
out.Write("\tblend_src.a = {}\n", out.Write("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]);
blend_src_factor_alpha[u32(uid_data->blend_src_factor_alpha)]);
out.Write("\tfloat4 blend_dst;\n"); out.Write("\tfloat4 blend_dst;\n");
out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[u32(uid_data->blend_dst_factor)]); out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[uid_data->blend_dst_factor]);
out.Write("\tblend_dst.a = {}\n", out.Write("\tblend_dst.a = {}\n", blend_dst_factor_alpha[uid_data->blend_dst_factor_alpha]);
blend_dst_factor_alpha[u32(uid_data->blend_dst_factor_alpha)]);
out.Write("\tfloat4 blend_result;\n"); out.Write("\tfloat4 blend_result;\n");
if (uid_data->blend_subtract) if (uid_data->blend_subtract)

View File

@ -963,7 +963,7 @@ void Renderer::RecordVideoMemory()
const u32* xfregs_ptr = reinterpret_cast<const u32*>(&xfmem) + FifoDataFile::XF_MEM_SIZE; const u32* xfregs_ptr = reinterpret_cast<const u32*>(&xfmem) + FifoDataFile::XF_MEM_SIZE;
u32 xfregs_size = sizeof(XFMemory) / 4 - FifoDataFile::XF_MEM_SIZE; u32 xfregs_size = sizeof(XFMemory) / 4 - FifoDataFile::XF_MEM_SIZE;
FillCPMemoryArray(cpmem); g_main_cp_state.FillCPMemoryArray(cpmem);
FifoRecorder::GetInstance().SetVideoMemory(bpmem_ptr, cpmem, xfmem_ptr, xfregs_ptr, xfregs_size, FifoRecorder::GetInstance().SetVideoMemory(bpmem_ptr, cpmem, xfmem_ptr, xfregs_ptr, xfregs_size,
texMem); texMem);
@ -986,9 +986,9 @@ bool Renderer::InitializeImGui()
ImGui::GetStyle().WindowRounding = 7.0f; ImGui::GetStyle().WindowRounding = 7.0f;
PortableVertexDeclaration vdecl = {}; PortableVertexDeclaration vdecl = {};
vdecl.position = {VAR_FLOAT, 2, offsetof(ImDrawVert, pos), true, false}; vdecl.position = {ComponentFormat::Float, 2, offsetof(ImDrawVert, pos), true, false};
vdecl.texcoords[0] = {VAR_FLOAT, 2, offsetof(ImDrawVert, uv), true, false}; vdecl.texcoords[0] = {ComponentFormat::Float, 2, offsetof(ImDrawVert, uv), true, false};
vdecl.colors[0] = {VAR_UNSIGNED_BYTE, 4, offsetof(ImDrawVert, col), true, false}; vdecl.colors[0] = {ComponentFormat::UByte, 4, offsetof(ImDrawVert, col), true, false};
vdecl.stride = sizeof(ImDrawVert); vdecl.stride = sizeof(ImDrawVert);
m_imgui_vertex_format = CreateNativeVertexFormat(vdecl); m_imgui_vertex_format = CreateNativeVertexFormat(vdecl);
if (!m_imgui_vertex_format) if (!m_imgui_vertex_format)

View File

@ -1095,7 +1095,7 @@ void ShaderCache::QueueUberShaderPipelines()
// All attributes will be enabled in GetUberVertexFormat. // All attributes will be enabled in GetUberVertexFormat.
PortableVertexDeclaration dummy_vertex_decl = {}; PortableVertexDeclaration dummy_vertex_decl = {};
dummy_vertex_decl.position.components = 4; dummy_vertex_decl.position.components = 4;
dummy_vertex_decl.position.type = VAR_FLOAT; dummy_vertex_decl.position.type = ComponentFormat::Float;
dummy_vertex_decl.position.enable = true; dummy_vertex_decl.position.enable = true;
dummy_vertex_decl.stride = sizeof(float) * 4; dummy_vertex_decl.stride = sizeof(float) * 4;
NativeVertexFormat* dummy_vertex_format = NativeVertexFormat* dummy_vertex_format =

View File

@ -13,10 +13,11 @@
#include "Common/BitField.h" #include "Common/BitField.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/StringUtil.h" #include "Common/StringUtil.h"
#include "Common/TypeUtils.h" #include "Common/TypeUtils.h"
enum class APIType; #include "VideoCommon/VideoCommon.h"
/** /**
* Common interface for classes that need to go through the shader generation path * Common interface for classes that need to go through the shader generation path
@ -210,6 +211,64 @@ std::string BitfieldExtract(std::string_view source)
static_cast<u32>(BitFieldT::NumBits())); static_cast<u32>(BitFieldT::NumBits()));
} }
template <auto last_member, typename = decltype(last_member)>
void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable,
const Common::EnumMap<std::string_view, last_member>& values, int indent,
bool break_)
{
const bool make_switch = (ApiType == APIType::D3D);
// The second template argument is needed to avoid compile errors from ambiguity with multiple
// enums with the same number of members in GCC prior to 8. See https://godbolt.org/z/xcKaW1seW
// and https://godbolt.org/z/hz7Yqq1P5
using enum_type = decltype(last_member);
// {:{}} is used to indent by formatting an empty string with a variable width
if (make_switch)
{
out.Write("{:{}}switch ({}) {{\n", "", indent, variable);
for (u32 i = 0; i <= static_cast<u32>(last_member); i++)
{
const enum_type key = static_cast<enum_type>(i);
// Assumes existence of an EnumFormatter
out.Write("{:{}}case {:s}:\n", "", indent, key);
// Note that this indentation behaves poorly for multi-line code
if (!values[key].empty())
out.Write("{:{}} {}\n", "", indent, values[key]);
if (break_)
out.Write("{:{}} break;\n", "", indent);
}
out.Write("{:{}}}}\n", "", indent);
}
else
{
// Generate a tree of if statements recursively
// std::function must be used because auto won't capture before initialization and thus can't be
// used recursively
std::function<void(u32, u32, u32)> BuildTree = [&](u32 cur_indent, u32 low, u32 high) {
// Each generated statement is for low <= x < high
if (high == low + 1)
{
// Down to 1 case (low <= x < low + 1 means x == low)
const enum_type key = static_cast<enum_type>(low);
// Note that this indentation behaves poorly for multi-line code
out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key);
}
else
{
u32 mid = low + ((high - low) / 2);
out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid);
BuildTree(cur_indent + 2, low, mid);
out.Write("{:{}}}} else {{\n", "", cur_indent);
BuildTree(cur_indent + 2, mid, high);
out.Write("{:{}}}}\n", "", cur_indent);
}
};
BuildTree(indent, 0, static_cast<u32>(last_member) + 1);
}
}
// Constant variable names // Constant variable names
#define I_COLORS "color" #define I_COLORS "color"
#define I_KCOLORS "k" #define I_KCOLORS "k"

View File

@ -1300,42 +1300,30 @@ TextureCacheBase::GetTexture(const int textureCacheSafetyColorSampleSize, Textur
// Search the texture cache for textures by address // Search the texture cache for textures by address
// //
// Find all texture cache entries for the current texture address, and decide whether to use one // Find all texture cache entries for the current texture address, and decide whether to use one
// of // of them, or to create a new one
// them, or to create a new one
// //
// In most cases, the fastest way is to use only one texture cache entry for the same address. // In most cases, the fastest way is to use only one texture cache entry for the same address.
// Usually, // Usually, when a texture changes, the old version of the texture is unlikely to be used again.
// when a texture changes, the old version of the texture is unlikely to be used again. If there // If there were new cache entries created for normal texture updates, there would be a slowdown
// were // due to a huge amount of unused cache entries. Also thanks to texture pooling, overwriting an
// new cache entries created for normal texture updates, there would be a slowdown due to a huge // existing cache entry is faster than creating a new one from scratch.
// amount
// of unused cache entries. Also thanks to texture pooling, overwriting an existing cache entry is
// faster than creating a new one from scratch.
// //
// Some games use the same address for different textures though. If the same cache entry was used // Some games use the same address for different textures though. If the same cache entry was used
// in // in this case, it would be constantly overwritten, and effectively there wouldn't be any caching
// this case, it would be constantly overwritten, and effectively there wouldn't be any caching // for those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has
// for // multiple sets of fonts on each other stored in a single texture and uses the palette to make
// those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has // different characters visible or invisible. In Castlevania 3 some textures are used for 2
// multiple // different things or at least in 2 different ways (size 1024x1024 vs 1024x256).
// sets of fonts on each other stored in a single texture and uses the palette to make different
// characters visible or invisible. In Castlevania 3 some textures are used for 2 different things
// or
// at least in 2 different ways(size 1024x1024 vs 1024x256).
// //
// To determine whether to use multiple cache entries or a single entry, use the following // To determine whether to use multiple cache entries or a single entry, use the following
// heuristic: // heuristic: If the same texture address is used several times during the same frame, assume the
// If the same texture address is used several times during the same frame, assume the address is // address is used for different purposes and allow creating an additional cache entry. If there's
// used // at least one entry that hasn't been used for the same frame, then overwrite it, in order to
// for different purposes and allow creating an additional cache entry. If there's at least one // keep the cache as small as possible. If the current texture is found in the cache, use that
// entry // entry.
// that hasn't been used for the same frame, then overwrite it, in order to keep the cache as
// small as
// possible. If the current texture is found in the cache, use that entry.
// //
// For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else // For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else
// it was // it was done in vain.
// done in vain.
auto iter_range = textures_by_address.equal_range(texture_info.GetRawAddress()); auto iter_range = textures_by_address.equal_range(texture_info.GetRawAddress());
TexAddrCache::iterator iter = iter_range.first; TexAddrCache::iterator iter = iter_range.first;
TexAddrCache::iterator oldest_entry = iter; TexAddrCache::iterator oldest_entry = iter;

View File

@ -404,263 +404,95 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
"int4 getKonstColor(State s, StageState ss);\n" "int4 getKonstColor(State s, StageState ss);\n"
"\n"); "\n");
// The switch statements in these functions appear to get transformed into an if..else chain static constexpr Common::EnumMap<std::string_view, CompareMode::Always> tev_alpha_funcs_table{
// on NVIDIA's OpenGL/Vulkan drivers, resulting in lower performance than the D3D counterparts. "return false;", // CompareMode::Never
// Transforming the switch into a binary tree of ifs can increase performance by up to 20%. "return a < b;", // CompareMode::Less
if (api_type == APIType::D3D) "return a == b;", // CompareMode::Equal
{ "return a <= b;", // CompareMode::LEqual
out.Write("// Helper function for Alpha Test\n" "return a > b;", // CompareMode::Greater
"bool alphaCompare(int a, int b, uint compare) {{\n" "return a != b;", // CompareMode::NEqual
" switch (compare) {{\n" "return a >= b;", // CompareMode::GEqual
" case 0u: // NEVER\n" "return true;" // CompareMode::Always
" return false;\n" };
" case 1u: // LESS\n"
" return a < b;\n" static constexpr Common::EnumMap<std::string_view, TevColorArg::Zero> tev_c_input_table{
" case 2u: // EQUAL\n" "return s.Reg[0].rgb;", // CPREV,
" return a == b;\n" "return s.Reg[0].aaa;", // APREV,
" case 3u: // LEQUAL\n" "return s.Reg[1].rgb;", // C0,
" return a <= b;\n" "return s.Reg[1].aaa;", // A0,
" case 4u: // GREATER\n" "return s.Reg[2].rgb;", // C1,
" return a > b;\n" "return s.Reg[2].aaa;", // A1,
" case 5u: // NEQUAL;\n" "return s.Reg[3].rgb;", // C2,
" return a != b;\n" "return s.Reg[3].aaa;", // A2,
" case 6u: // GEQUAL\n" "return s.TexColor.rgb;", // TEXC,
" return a >= b;\n" "return s.TexColor.aaa;", // TEXA,
" case 7u: // ALWAYS\n" "return getRasColor(s, ss, colors_0, colors_1).rgb;", // RASC,
" return true;\n" "return getRasColor(s, ss, colors_0, colors_1).aaa;", // RASA,
" }}\n" "return int3(255, 255, 255);", // ONE
"}}\n" "return int3(128, 128, 128);", // HALF
"\n" "return getKonstColor(s, ss).rgb;", // KONST
"int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " "return int3(0, 0, 0);", // ZERO
"uint index) {{\n" };
" switch (index) {{\n"
" case 0u: // prev.rgb\n" static constexpr Common::EnumMap<std::string_view, TevAlphaArg::Zero> tev_a_input_table{
" return s.Reg[0].rgb;\n" "return s.Reg[0].a;", // APREV,
" case 1u: // prev.aaa\n" "return s.Reg[1].a;", // A0,
" return s.Reg[0].aaa;\n" "return s.Reg[2].a;", // A1,
" case 2u: // c0.rgb\n" "return s.Reg[3].a;", // A2,
" return s.Reg[1].rgb;\n" "return s.TexColor.a;", // TEXA,
" case 3u: // c0.aaa\n" "return getRasColor(s, ss, colors_0, colors_1).a;", // RASA,
" return s.Reg[1].aaa;\n" "return getKonstColor(s, ss).a;", // KONST, (hw1 had quarter)
" case 4u: // c1.rgb\n" "return 0;", // ZERO
" return s.Reg[2].rgb;\n" };
" case 5u: // c1.aaa\n"
" return s.Reg[2].aaa;\n" static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_regs_lookup_table{
" case 6u: // c2.rgb\n" "return s.Reg[0];",
" return s.Reg[3].rgb;\n" "return s.Reg[1];",
" case 7u: // c2.aaa\n" "return s.Reg[2];",
" return s.Reg[3].aaa;\n" "return s.Reg[3];",
" case 8u:\n" };
" return s.TexColor.rgb;\n"
" case 9u:\n" static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_c_set_table{
" return s.TexColor.aaa;\n" "s.Reg[0].rgb = color;",
" case 10u:\n" "s.Reg[1].rgb = color;",
" return getRasColor(s, ss, colors_0, colors_1).rgb;\n" "s.Reg[2].rgb = color;",
" case 11u:\n" "s.Reg[3].rgb = color;",
" return getRasColor(s, ss, colors_0, colors_1).aaa;\n" };
" case 12u: // One\n"
" return int3(255, 255, 255);\n" static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_a_set_table{
" case 13u: // Half\n" "s.Reg[0].a = alpha;",
" return int3(128, 128, 128);\n" "s.Reg[1].a = alpha;",
" case 14u:\n" "s.Reg[2].a = alpha;",
" return getKonstColor(s, ss).rgb;\n" "s.Reg[3].a = alpha;",
" case 15u: // Zero\n" };
" return int3(0, 0, 0);\n"
" }}\n" out.Write("// Helper function for Alpha Test\n"
"}}\n" "bool alphaCompare(int a, int b, uint compare) {{\n");
"\n" WriteSwitch(out, api_type, "compare", tev_alpha_funcs_table, 2, false);
"int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " out.Write("}}\n"
"uint index) {{\n" "\n"
" switch (index) {{\n" "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
" case 0u: // prev.a\n" "uint index) {{\n");
" return s.Reg[0].a;\n" WriteSwitch(out, api_type, "index", tev_c_input_table, 2, false);
" case 1u: // c0.a\n" out.Write("}}\n"
" return s.Reg[1].a;\n" "\n"
" case 2u: // c1.a\n" "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
" return s.Reg[2].a;\n" "uint index) {{\n");
" case 3u: // c2.a\n" WriteSwitch(out, api_type, "index", tev_a_input_table, 2, false);
" return s.Reg[3].a;\n" out.Write("}}\n"
" case 4u:\n" "\n"
" return s.TexColor.a;\n" "int4 getTevReg(in State s, uint index) {{\n");
" case 5u:\n" WriteSwitch(out, api_type, "index", tev_regs_lookup_table, 2, false);
" return getRasColor(s, ss, colors_0, colors_1).a;\n" out.Write("}}\n"
" case 6u:\n" "\n"
" return getKonstColor(s, ss).a;\n" "void setRegColor(inout State s, uint index, int3 color) {{\n");
" case 7u: // Zero\n" WriteSwitch(out, api_type, "index", tev_c_set_table, 2, true);
" return 0;\n" out.Write("}}\n"
" }}\n" "\n"
"}}\n" "void setRegAlpha(inout State s, uint index, int alpha) {{\n");
"\n" WriteSwitch(out, api_type, "index", tev_a_set_table, 2, true);
"int4 getTevReg(in State s, uint index) {{\n" out.Write("}}\n"
" switch (index) {{\n" "\n");
" case 0u: // prev\n"
" return s.Reg[0];\n"
" case 1u: // c0\n"
" return s.Reg[1];\n"
" case 2u: // c1\n"
" return s.Reg[2];\n"
" case 3u: // c2\n"
" return s.Reg[3];\n"
" default: // prev\n"
" return s.Reg[0];\n"
" }}\n"
"}}\n"
"\n"
"void setRegColor(inout State s, uint index, int3 color) {{\n"
" switch (index) {{\n"
" case 0u: // prev\n"
" s.Reg[0].rgb = color;\n"
" break;\n"
" case 1u: // c0\n"
" s.Reg[1].rgb = color;\n"
" break;\n"
" case 2u: // c1\n"
" s.Reg[2].rgb = color;\n"
" break;\n"
" case 3u: // c2\n"
" s.Reg[3].rgb = color;\n"
" break;\n"
" }}\n"
"}}\n"
"\n"
"void setRegAlpha(inout State s, uint index, int alpha) {{\n"
" switch (index) {{\n"
" case 0u: // prev\n"
" s.Reg[0].a = alpha;\n"
" break;\n"
" case 1u: // c0\n"
" s.Reg[1].a = alpha;\n"
" break;\n"
" case 2u: // c1\n"
" s.Reg[2].a = alpha;\n"
" break;\n"
" case 3u: // c2\n"
" s.Reg[3].a = alpha;\n"
" break;\n"
" }}\n"
"}}\n"
"\n");
}
else
{
out.Write(
"// Helper function for Alpha Test\n"
"bool alphaCompare(int a, int b, uint compare) {{\n"
" if (compare < 4u) {{\n"
" if (compare < 2u) {{\n"
" return (compare == 0u) ? (false) : (a < b);\n"
" }} else {{\n"
" return (compare == 2u) ? (a == b) : (a <= b);\n"
" }}\n"
" }} else {{\n"
" if (compare < 6u) {{\n"
" return (compare == 4u) ? (a > b) : (a != b);\n"
" }} else {{\n"
" return (compare == 6u) ? (a >= b) : (true);\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
"uint index) {{\n"
" if (index < 8u) {{\n"
" if (index < 4u) {{\n"
" if (index < 2u) {{\n"
" return (index == 0u) ? s.Reg[0].rgb : s.Reg[0].aaa;\n"
" }} else {{\n"
" return (index == 2u) ? s.Reg[1].rgb : s.Reg[1].aaa;\n"
" }}\n"
" }} else {{\n"
" if (index < 6u) {{\n"
" return (index == 4u) ? s.Reg[2].rgb : s.Reg[2].aaa;\n"
" }} else {{\n"
" return (index == 6u) ? s.Reg[3].rgb : s.Reg[3].aaa;\n"
" }}\n"
" }}\n"
" }} else {{\n"
" if (index < 12u) {{\n"
" if (index < 10u) {{\n"
" return (index == 8u) ? s.TexColor.rgb : s.TexColor.aaa;\n"
" }} else {{\n"
" int4 ras = getRasColor(s, ss, colors_0, colors_1);\n"
" return (index == 10u) ? ras.rgb : ras.aaa;\n"
" }}\n"
" }} else {{\n"
" if (index < 14u) {{\n"
" return (index == 12u) ? int3(255, 255, 255) : int3(128, 128, 128);\n"
" }} else {{\n"
" return (index == 14u) ? getKonstColor(s, ss).rgb : int3(0, 0, 0);\n"
" }}\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
"uint index) {{\n"
" if (index < 4u) {{\n"
" if (index < 2u) {{\n"
" return (index == 0u) ? s.Reg[0].a : s.Reg[1].a;\n"
" }} else {{\n"
" return (index == 2u) ? s.Reg[2].a : s.Reg[3].a;\n"
" }}\n"
" }} else {{\n"
" if (index < 6u) {{\n"
" return (index == 4u) ? s.TexColor.a : getRasColor(s, ss, colors_0, colors_1).a;\n"
" }} else {{\n"
" return (index == 6u) ? getKonstColor(s, ss).a : 0;\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"int4 getTevReg(in State s, uint index) {{\n"
" if (index < 2u) {{\n"
" if (index == 0u) {{\n"
" return s.Reg[0];\n"
" }} else {{\n"
" return s.Reg[1];\n"
" }}\n"
" }} else {{\n"
" if (index == 2u) {{\n"
" return s.Reg[2];\n"
" }} else {{\n"
" return s.Reg[3];\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"void setRegColor(inout State s, uint index, int3 color) {{\n"
" if (index < 2u) {{\n"
" if (index == 0u) {{\n"
" s.Reg[0].rgb = color;\n"
" }} else {{\n"
" s.Reg[1].rgb = color;\n"
" }}\n"
" }} else {{\n"
" if (index == 2u) {{\n"
" s.Reg[2].rgb = color;\n"
" }} else {{\n"
" s.Reg[3].rgb = color;\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"void setRegAlpha(inout State s, uint index, int alpha) {{\n"
" if (index < 2u) {{\n"
" if (index == 0u) {{\n"
" s.Reg[0].a = alpha;\n"
" }} else {{\n"
" s.Reg[1].a = alpha;\n"
" }}\n"
" }} else {{\n"
" if (index == 2u) {{\n"
" s.Reg[2].a = alpha;\n"
" }} else {{\n"
" s.Reg[3].a = alpha;\n"
" }}\n"
" }}\n"
"}}\n"
"\n");
}
// Since the fixed-point texture coodinate variables aren't global, we need to pass // Since the fixed-point texture coodinate variables aren't global, we need to pass
// them to the select function. This applies to all backends. // them to the select function. This applies to all backends.
@ -1284,78 +1116,59 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
if (use_shader_blend) if (use_shader_blend)
{ {
static constexpr std::array<std::string_view, 8> blendSrcFactor{{ using Common::EnumMap;
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE static constexpr EnumMap<std::string_view, SrcBlendFactor::InvDstAlpha> blendSrcFactor{
"initial_ocol0.rgb;", // DSTCLR "blend_src.rgb = float3(0,0,0);", // ZERO
"float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR "blend_src.rgb = float3(1,1,1);", // ONE
"ocol1.aaa;", // SRCALPHA "blend_src.rgb = initial_ocol0.rgb;", // DSTCLR
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA "blend_src.rgb = float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
"initial_ocol0.aaa;", // DSTALPHA "blend_src.rgb = ocol1.aaa;", // SRCALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA "blend_src.rgb = float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
}}; "blend_src.rgb = initial_ocol0.aaa;", // DSTALPHA
static constexpr std::array<std::string_view, 8> blendSrcFactorAlpha{{ "blend_src.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
"0.0;", // ZERO };
"1.0;", // ONE static constexpr EnumMap<std::string_view, SrcBlendFactor::InvDstAlpha> blendSrcFactorAlpha{
"initial_ocol0.a;", // DSTCLR "blend_src.a = 0.0;", // ZERO
"1.0 - initial_ocol0.a;", // INVDSTCLR "blend_src.a = 1.0;", // ONE
"ocol1.a;", // SRCALPHA "blend_src.a = initial_ocol0.a;", // DSTCLR
"1.0 - ocol1.a;", // INVSRCALPHA "blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTCLR
"initial_ocol0.a;", // DSTALPHA "blend_src.a = ocol1.a;", // SRCALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA "blend_src.a = 1.0 - ocol1.a;", // INVSRCALPHA
}}; "blend_src.a = initial_ocol0.a;", // DSTALPHA
static constexpr std::array<std::string_view, 8> blendDstFactor{{ "blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA
"float3(0,0,0);", // ZERO };
"float3(1,1,1);", // ONE static constexpr EnumMap<std::string_view, DstBlendFactor::InvDstAlpha> blendDstFactor{
"ocol0.rgb;", // SRCCLR "blend_dst.rgb = float3(0,0,0);", // ZERO
"float3(1,1,1) - ocol0.rgb;", // INVSRCCLR "blend_dst.rgb = float3(1,1,1);", // ONE
"ocol1.aaa;", // SRCALHA "blend_dst.rgb = ocol0.rgb;", // SRCCLR
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA "blend_dst.rgb = float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
"initial_ocol0.aaa;", // DSTALPHA "blend_dst.rgb = ocol1.aaa;", // SRCALHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA "blend_dst.rgb = float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
}}; "blend_dst.rgb = initial_ocol0.aaa;", // DSTALPHA
static constexpr std::array<std::string_view, 8> blendDstFactorAlpha{{ "blend_dst.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
"0.0;", // ZERO };
"1.0;", // ONE static constexpr EnumMap<std::string_view, DstBlendFactor::InvDstAlpha> blendDstFactorAlpha{
"ocol0.a;", // SRCCLR "blend_dst.a = 0.0;", // ZERO
"1.0 - ocol0.a;", // INVSRCCLR "blend_dst.a = 1.0;", // ONE
"ocol1.a;", // SRCALPHA "blend_dst.a = ocol0.a;", // SRCCLR
"1.0 - ocol1.a;", // INVSRCALPHA "blend_dst.a = 1.0 - ocol0.a;", // INVSRCCLR
"initial_ocol0.a;", // DSTALPHA "blend_dst.a = ocol1.a;", // SRCALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA "blend_dst.a = 1.0 - ocol1.a;", // INVSRCALPHA
}}; "blend_dst.a = initial_ocol0.a;", // DSTALPHA
"blend_dst.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA
};
out.Write(" if (blend_enable) {{\n" out.Write(" if (blend_enable) {{\n"
" float4 blend_src;\n" " float4 blend_src;\n");
" switch (blend_src_factor) {{\n"); WriteSwitch(out, api_type, "blend_src_factor", blendSrcFactor, 4, true);
for (size_t i = 0; i < blendSrcFactor.size(); i++) WriteSwitch(out, api_type, "blend_src_factor_alpha", blendSrcFactorAlpha, 4, true);
{
out.Write(" case {}u: blend_src.rgb = {}; break;\n", i, blendSrcFactor[i]);
}
out.Write(" }}\n" out.Write(" float4 blend_dst;\n");
" switch (blend_src_factor_alpha) {{\n"); WriteSwitch(out, api_type, "blend_dst_factor", blendDstFactor, 4, true);
for (size_t i = 0; i < blendSrcFactorAlpha.size(); i++) WriteSwitch(out, api_type, "blend_dst_factor_alpha", blendDstFactorAlpha, 4, true);
{
out.Write(" case {}u: blend_src.a = {}; break;\n", i, blendSrcFactorAlpha[i]);
}
out.Write(" }}\n"
" float4 blend_dst;\n"
" switch (blend_dst_factor) {{\n");
for (size_t i = 0; i < blendDstFactor.size(); i++)
{
out.Write(" case {}u: blend_dst.rgb = {}; break;\n", i, blendDstFactor[i]);
}
out.Write(" }}\n"
" switch (blend_dst_factor_alpha) {{\n");
for (size_t i = 0; i < blendDstFactorAlpha.size(); i++)
{
out.Write(" case {}u: blend_dst.a = {}; break;\n", i, blendDstFactorAlpha[i]);
}
out.Write( out.Write(
" }}\n"
" float4 blend_result;\n" " float4 blend_result;\n"
" if (blend_subtract)\n" " if (blend_subtract)\n"
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n" " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n"

View File

@ -91,7 +91,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = nat_offset; m_native_vtx_decl.posmtx.offset = nat_offset;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true; m_native_vtx_decl.posmtx.integer = true;
nat_offset += 4; nat_offset += 4;
} }
@ -110,7 +110,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.position.components = pos_elements; m_native_vtx_decl.position.components = pos_elements;
m_native_vtx_decl.position.enable = true; m_native_vtx_decl.position.enable = true;
m_native_vtx_decl.position.offset = nat_offset; m_native_vtx_decl.position.offset = nat_offset;
m_native_vtx_decl.position.type = VAR_FLOAT; m_native_vtx_decl.position.type = ComponentFormat::Float;
m_native_vtx_decl.position.integer = false; m_native_vtx_decl.position.integer = false;
nat_offset += pos_elements * sizeof(float); nat_offset += pos_elements * sizeof(float);
@ -134,7 +134,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.normals[i].components = 3; m_native_vtx_decl.normals[i].components = 3;
m_native_vtx_decl.normals[i].enable = true; m_native_vtx_decl.normals[i].enable = true;
m_native_vtx_decl.normals[i].offset = nat_offset; m_native_vtx_decl.normals[i].offset = nat_offset;
m_native_vtx_decl.normals[i].type = VAR_FLOAT; m_native_vtx_decl.normals[i].type = ComponentFormat::Float;
m_native_vtx_decl.normals[i].integer = false; m_native_vtx_decl.normals[i].integer = false;
nat_offset += 12; nat_offset += 12;
} }
@ -143,7 +143,7 @@ void VertexLoader::CompileVertexTranslator()
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{ {
m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false; m_native_vtx_decl.colors[i].integer = false;
TPipelineFunction pFunc = TPipelineFunction pFunc =
@ -166,7 +166,7 @@ void VertexLoader::CompileVertexTranslator()
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{ {
m_native_vtx_decl.texcoords[i].offset = nat_offset; m_native_vtx_decl.texcoords[i].offset = nat_offset;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false; m_native_vtx_decl.texcoords[i].integer = false;
const auto tc = m_VtxDesc.high.TexCoord[i].Value(); const auto tc = m_VtxDesc.high.TexCoord[i].Value();

View File

@ -6,6 +6,7 @@
#include <array> #include <array>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderManager.h"
@ -59,7 +60,7 @@ VertexLoaderARM64::VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_at
WriteProtect(); WriteProtect();
} }
void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute, ARM64Reg reg) void VertexLoaderARM64::GetVertexAddr(CPArray array, VertexComponentFormat attribute, ARM64Reg reg)
{ {
if (IsIndexed(attribute)) if (IsIndexed(attribute))
{ {
@ -95,7 +96,7 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute
REV16(scratch1_reg, scratch1_reg); REV16(scratch1_reg, scratch1_reg);
} }
if (array == ARRAY_POSITION) if (array == CPArray::Position)
{ {
EOR(scratch2_reg, scratch1_reg, EOR(scratch2_reg, scratch1_reg,
attribute == VertexComponentFormat::Index8 ? LogicalImm(0xFF, 32) : attribute == VertexComponentFormat::Index8 ? LogicalImm(0xFF, 32) :
@ -103,17 +104,18 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute
m_skip_vertex = CBZ(scratch2_reg); m_skip_vertex = CBZ(scratch2_reg);
} }
LDR(IndexType::Unsigned, scratch2_reg, stride_reg, array * 4); LDR(IndexType::Unsigned, scratch2_reg, stride_reg, static_cast<u8>(array) * 4);
MUL(scratch1_reg, scratch1_reg, scratch2_reg); MUL(scratch1_reg, scratch1_reg, scratch2_reg);
LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg, array * 8); LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg,
static_cast<u8>(array) * 8);
ADD(EncodeRegTo64(reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg)); ADD(EncodeRegTo64(reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg));
} }
else else
ADD(reg, src_reg, m_src_ofs); ADD(reg, src_reg, m_src_ofs);
} }
s32 VertexLoaderARM64::GetAddressImm(int array, VertexComponentFormat attribute, s32 VertexLoaderARM64::GetAddressImm(CPArray array, VertexComponentFormat attribute,
Arm64Gen::ARM64Reg reg, u32 align) Arm64Gen::ARM64Reg reg, u32 align)
{ {
if (IsIndexed(attribute) || (m_src_ofs > 255 && (m_src_ofs & (align - 1)))) if (IsIndexed(attribute) || (m_src_ofs > 255 && (m_src_ofs & (align - 1))))
@ -219,7 +221,7 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm
native_format->components = count_out; native_format->components = count_out;
native_format->enable = true; native_format->enable = true;
native_format->offset = m_dst_ofs; native_format->offset = m_dst_ofs;
native_format->type = VAR_FLOAT; native_format->type = ComponentFormat::Float;
native_format->integer = false; native_format->integer = false;
m_dst_ofs += sizeof(float) * count_out; m_dst_ofs += sizeof(float) * count_out;
@ -403,8 +405,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
MOV(skipped_reg, ARM64Reg::WZR); MOV(skipped_reg, ARM64Reg::WZR);
MOV(saved_count, count_reg); MOV(saved_count, count_reg);
MOVP2R(stride_reg, g_main_cp_state.array_strides); MOVP2R(stride_reg, g_main_cp_state.array_strides.data());
MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases); MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data());
if (need_scale) if (need_scale)
MOVP2R(scale_reg, scale_factors); MOVP2R(scale_reg, scale_factors);
@ -427,7 +429,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = m_dst_ofs; m_native_vtx_decl.posmtx.offset = m_dst_ofs;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true; m_native_vtx_decl.posmtx.integer = true;
m_src_ofs += sizeof(u8); m_src_ofs += sizeof(u8);
m_dst_ofs += sizeof(u32); m_dst_ofs += sizeof(u32);
@ -448,8 +450,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_size = GetLoadSize(load_bytes); int load_size = GetLoadSize(load_bytes);
load_size <<= 3; load_size <<= 3;
s32 offset = GetAddressImm(ARRAY_POSITION, m_VtxDesc.low.Position, EncodeRegTo64(scratch1_reg), s32 offset = GetAddressImm(CPArray::Position, m_VtxDesc.low.Position,
load_size); EncodeRegTo64(scratch1_reg), load_size);
ReadVertex(m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements, ReadVertex(m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements,
m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position, offset); m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position, offset);
} }
@ -470,7 +472,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_bytes = elem_size * 3; int load_bytes = elem_size * 3;
int load_size = GetLoadSize(load_bytes); int load_size = GetLoadSize(load_bytes);
offset = GetAddressImm(ARRAY_NORMAL, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg), offset = GetAddressImm(CPArray::Normal, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg),
load_size << 3); load_size << 3);
if (offset == -1) if (offset == -1)
@ -488,10 +490,10 @@ void VertexLoaderARM64::GenerateVertexLoader()
} }
} }
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{ {
m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false; m_native_vtx_decl.colors[i].integer = false;
if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent) if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
@ -501,22 +503,22 @@ void VertexLoaderARM64::GenerateVertexLoader()
m_VtxAttr.GetColorFormat(i) == ColorFormat::RGBA4444) m_VtxAttr.GetColorFormat(i) == ColorFormat::RGBA4444)
align = 2; align = 2;
s32 offset = GetAddressImm(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i], s32 offset = GetAddressImm(CPArray::Color0 + i, m_VtxDesc.low.Color[i],
EncodeRegTo64(scratch1_reg), align); EncodeRegTo64(scratch1_reg), align);
ReadColor(m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i), offset); ReadColor(m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i), offset);
m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true; m_native_vtx_decl.colors[i].enable = true;
m_native_vtx_decl.colors[i].offset = m_dst_ofs; m_native_vtx_decl.colors[i].offset = m_dst_ofs;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false; m_native_vtx_decl.colors[i].integer = false;
m_dst_ofs += 4; m_dst_ofs += 4;
} }
} }
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{ {
m_native_vtx_decl.texcoords[i].offset = m_dst_ofs; m_native_vtx_decl.texcoords[i].offset = m_dst_ofs;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false; m_native_vtx_decl.texcoords[i].integer = false;
int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::S ? 1 : 2; int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::S ? 1 : 2;
@ -527,7 +529,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_size = GetLoadSize(load_bytes); int load_size = GetLoadSize(load_bytes);
load_size <<= 3; load_size <<= 3;
s32 offset = GetAddressImm(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i], s32 offset = GetAddressImm(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i],
EncodeRegTo64(scratch1_reg), load_size); EncodeRegTo64(scratch1_reg), load_size);
u8 scaling_exponent = m_VtxAttr.GetTexFrac(i); u8 scaling_exponent = m_VtxAttr.GetTexFrac(i);
ReadVertex(m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements, ReadVertex(m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements,
@ -538,7 +540,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
{ {
m_native_vtx_decl.texcoords[i].components = 3; m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true; m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false; m_native_vtx_decl.texcoords[i].integer = false;
LDRB(IndexType::Unsigned, scratch2_reg, src_reg, texmatidx_ofs[i]); LDRB(IndexType::Unsigned, scratch2_reg, src_reg, texmatidx_ofs[i]);

View File

@ -11,6 +11,7 @@ class DataReader;
enum class VertexComponentFormat; enum class VertexComponentFormat;
enum class ComponentFormat; enum class ComponentFormat;
enum class ColorFormat; enum class ColorFormat;
enum class CPArray : u8;
class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlock class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlock
{ {
@ -25,8 +26,9 @@ private:
u32 m_dst_ofs = 0; u32 m_dst_ofs = 0;
Arm64Gen::FixupBranch m_skip_vertex; Arm64Gen::FixupBranch m_skip_vertex;
Arm64Gen::ARM64FloatEmitter m_float_emit; Arm64Gen::ARM64FloatEmitter m_float_emit;
void GetVertexAddr(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg); void GetVertexAddr(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, u32 align); s32 GetAddressImm(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg,
u32 align);
int ReadVertex(VertexComponentFormat attribute, ComponentFormat format, int count_in, int ReadVertex(VertexComponentFormat attribute, ComponentFormat format, int count_in,
int count_out, bool dequantize, u8 scaling_exponent, int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format, s32 offset = -1); AttributeFormat* native_format, s32 offset = -1);

View File

@ -12,16 +12,14 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "Common/Assert.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Core/DolphinAnalytics.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "VideoCommon/BPMemory.h" #include "VideoCommon/BPMemory.h"
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/IndexGenerator.h" #include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/NativeVertexFormat.h"
@ -48,14 +46,21 @@ static std::mutex s_vertex_loader_map_lock;
static VertexLoaderMap s_vertex_loader_map; static VertexLoaderMap s_vertex_loader_map;
// TODO - change into array of pointers. Keep a map of all seen so far. // TODO - change into array of pointers. Keep a map of all seen so far.
u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS]; Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;
BitSet8 g_main_vat_dirty;
BitSet8 g_preprocess_vat_dirty;
bool g_bases_dirty; // Main only
u8 g_current_vat; // Main only
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
void Init() void Init()
{ {
MarkAllDirty(); MarkAllDirty();
for (auto& map_entry : g_main_cp_state.vertex_loaders) for (auto& map_entry : g_main_vertex_loaders)
map_entry = nullptr; map_entry = nullptr;
for (auto& map_entry : g_preprocess_cp_state.vertex_loaders) for (auto& map_entry : g_preprocess_vertex_loaders)
map_entry = nullptr; map_entry = nullptr;
SETSTAT(g_stats.num_vertex_loaders, 0); SETSTAT(g_stats.num_vertex_loaders, 0);
} }
@ -70,7 +75,7 @@ void Clear()
void UpdateVertexArrayPointers() void UpdateVertexArrayPointers()
{ {
// Anything to update? // Anything to update?
if (!g_main_cp_state.bases_dirty) if (!g_bases_dirty)
return; return;
// Some games such as Burnout 2 can put invalid addresses into // Some games such as Burnout 2 can put invalid addresses into
@ -80,27 +85,28 @@ void UpdateVertexArrayPointers()
// 12 through 15 are used for loading data into xfmem. // 12 through 15 are used for loading data into xfmem.
// We also only update the array base if the vertex description states we are going to use it. // We also only update the array base if the vertex description states we are going to use it.
if (IsIndexed(g_main_cp_state.vtx_desc.low.Position)) if (IsIndexed(g_main_cp_state.vtx_desc.low.Position))
cached_arraybases[ARRAY_POSITION] = cached_arraybases[CPArray::Position] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_POSITION]); Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Position]);
if (IsIndexed(g_main_cp_state.vtx_desc.low.Normal)) if (IsIndexed(g_main_cp_state.vtx_desc.low.Normal))
cached_arraybases[ARRAY_NORMAL] = Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_NORMAL]); cached_arraybases[CPArray::Normal] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Normal]);
for (size_t i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++) for (u8 i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++)
{ {
if (IsIndexed(g_main_cp_state.vtx_desc.low.Color[i])) if (IsIndexed(g_main_cp_state.vtx_desc.low.Color[i]))
cached_arraybases[ARRAY_COLOR0 + i] = cached_arraybases[CPArray::Color0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_COLOR0 + i]); Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Color0 + i]);
} }
for (size_t i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++) for (u8 i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++)
{ {
if (IsIndexed(g_main_cp_state.vtx_desc.high.TexCoord[i])) if (IsIndexed(g_main_cp_state.vtx_desc.high.TexCoord[i]))
cached_arraybases[ARRAY_TEXCOORD0 + i] = cached_arraybases[CPArray::TexCoord0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_TEXCOORD0 + i]); Memory::GetPointer(g_main_cp_state.array_bases[CPArray::TexCoord0 + i]);
} }
g_main_cp_state.bases_dirty = false; g_bases_dirty = false;
} }
namespace namespace
@ -115,8 +121,8 @@ struct entry
void MarkAllDirty() void MarkAllDirty()
{ {
g_main_cp_state.attr_dirty = BitSet32::AllTrue(8); g_main_vat_dirty = BitSet8::AllTrue(8);
g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8); g_preprocess_vat_dirty = BitSet8::AllTrue(8);
} }
NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl) NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl)
@ -140,7 +146,8 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
std::memset(&new_decl, 0, sizeof(new_decl)); std::memset(&new_decl, 0, sizeof(new_decl));
new_decl.stride = decl.stride; new_decl.stride = decl.stride;
auto MakeDummyAttribute = [](AttributeFormat& attr, VarType type, int components, bool integer) { auto MakeDummyAttribute = [](AttributeFormat& attr, ComponentFormat type, int components,
bool integer) {
attr.type = type; attr.type = type;
attr.components = components; attr.components = components;
attr.offset = 0; attr.offset = 0;
@ -158,32 +165,32 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
if (decl.position.enable) if (decl.position.enable)
CopyAttribute(new_decl.position, decl.position); CopyAttribute(new_decl.position, decl.position);
else else
MakeDummyAttribute(new_decl.position, VAR_FLOAT, 1, false); MakeDummyAttribute(new_decl.position, ComponentFormat::Float, 1, false);
for (size_t i = 0; i < std::size(new_decl.normals); i++) for (size_t i = 0; i < std::size(new_decl.normals); i++)
{ {
if (decl.normals[i].enable) if (decl.normals[i].enable)
CopyAttribute(new_decl.normals[i], decl.normals[i]); CopyAttribute(new_decl.normals[i], decl.normals[i]);
else else
MakeDummyAttribute(new_decl.normals[i], VAR_FLOAT, 1, false); MakeDummyAttribute(new_decl.normals[i], ComponentFormat::Float, 1, false);
} }
for (size_t i = 0; i < std::size(new_decl.colors); i++) for (size_t i = 0; i < std::size(new_decl.colors); i++)
{ {
if (decl.colors[i].enable) if (decl.colors[i].enable)
CopyAttribute(new_decl.colors[i], decl.colors[i]); CopyAttribute(new_decl.colors[i], decl.colors[i]);
else else
MakeDummyAttribute(new_decl.colors[i], VAR_UNSIGNED_BYTE, 4, false); MakeDummyAttribute(new_decl.colors[i], ComponentFormat::UByte, 4, false);
} }
for (size_t i = 0; i < std::size(new_decl.texcoords); i++) for (size_t i = 0; i < std::size(new_decl.texcoords); i++)
{ {
if (decl.texcoords[i].enable) if (decl.texcoords[i].enable)
CopyAttribute(new_decl.texcoords[i], decl.texcoords[i]); CopyAttribute(new_decl.texcoords[i], decl.texcoords[i]);
else else
MakeDummyAttribute(new_decl.texcoords[i], VAR_FLOAT, 1, false); MakeDummyAttribute(new_decl.texcoords[i], ComponentFormat::Float, 1, false);
} }
if (decl.posmtx.enable) if (decl.posmtx.enable)
CopyAttribute(new_decl.posmtx, decl.posmtx); CopyAttribute(new_decl.posmtx, decl.posmtx);
else else
MakeDummyAttribute(new_decl.posmtx, VAR_UNSIGNED_BYTE, 1, true); MakeDummyAttribute(new_decl.posmtx, ComponentFormat::UByte, 1, true);
return GetOrCreateMatchingFormat(new_decl); return GetOrCreateMatchingFormat(new_decl);
} }
@ -191,10 +198,12 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false) static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false)
{ {
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state; CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
state->last_id = vtx_attr_group; BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders;
g_current_vat = vtx_attr_group;
VertexLoaderBase* loader; VertexLoaderBase* loader;
if (state->attr_dirty[vtx_attr_group]) if (attr_dirty[vtx_attr_group])
{ {
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong // We are not allowed to create a native vertex format on preprocessing as this is on the wrong
// thread // thread
@ -224,12 +233,12 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
native = g_renderer->CreateNativeVertexFormat(format); native = g_renderer->CreateNativeVertexFormat(format);
loader->m_native_vertex_format = native.get(); loader->m_native_vertex_format = native.get();
} }
state->vertex_loaders[vtx_attr_group] = loader; vertex_loaders[vtx_attr_group] = loader;
state->attr_dirty[vtx_attr_group] = false; attr_dirty[vtx_attr_group] = false;
} }
else else
{ {
loader = state->vertex_loaders[vtx_attr_group]; loader = vertex_loaders[vtx_attr_group];
} }
// Lookup pointers for any vertex arrays. // Lookup pointers for any vertex arrays.
@ -239,7 +248,8 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
return loader; return loader;
} }
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess) int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess)
{ {
if (!count) if (!count)
return 0; return 0;
@ -266,7 +276,8 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence // They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope. // slope.
bool cullall = (bpmem.genMode.cullmode == CullMode::All && primitive < 5); bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
DataReader dst = g_vertex_manager->PrepareForAdditionalData( DataReader dst = g_vertex_manager->PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall); primitive, count, loader->m_native_vtx_decl.stride, cullall);
@ -287,147 +298,3 @@ NativeVertexFormat* GetCurrentVertexFormat()
} }
} // namespace VertexLoaderManager } // namespace VertexLoaderManager
void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
{
bool update_global_state = !is_preprocess;
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
switch (sub_cmd & CP_COMMAND_MASK)
{
case UNKNOWN_00:
case UNKNOWN_10:
case UNKNOWN_20:
if (!(sub_cmd == UNKNOWN_20 && value == 0))
{
// All titles using libogc or the official SDK issue 0x20 with value=0 on startup
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}",
sub_cmd);
}
break;
case MATINDEX_A:
if (sub_cmd != MATINDEX_A)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_A: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_A, sub_cmd);
}
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedA(value);
break;
case MATINDEX_B:
if (sub_cmd != MATINDEX_B)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_B: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_B, sub_cmd);
}
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedB(value);
break;
case VCD_LO:
if (sub_cmd != VCD_LO) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_LO: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_LO, sub_cmd);
}
state->vtx_desc.low.Hex = value;
state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG);
state->bases_dirty = true;
break;
case VCD_HI:
if (sub_cmd != VCD_HI) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_HI: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_HI, sub_cmd);
}
state->vtx_desc.high.Hex = value;
state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG);
state->bases_dirty = true;
break;
case CP_VAT_REG_A:
if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;
case CP_VAT_REG_B:
if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;
case CP_VAT_REG_C:
if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;
// Pointers to vertex arrays in GC RAM
case ARRAY_BASE:
state->array_bases[sub_cmd & CP_ARRAY_MASK] =
value & CommandProcessor::GetPhysicalAddressMask();
state->bases_dirty = true;
break;
case ARRAY_STRIDE:
state->array_strides[sub_cmd & CP_ARRAY_MASK] = value & 0xFF;
break;
default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value);
}
}
void FillCPMemoryArray(u32* memory)
{
memory[MATINDEX_A] = g_main_cp_state.matrix_index_a.Hex;
memory[MATINDEX_B] = g_main_cp_state.matrix_index_b.Hex;
memory[VCD_LO] = g_main_cp_state.vtx_desc.low.Hex;
memory[VCD_HI] = g_main_cp_state.vtx_desc.high.Hex;
for (int i = 0; i < CP_NUM_VAT_REG; ++i)
{
memory[CP_VAT_REG_A + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[CP_VAT_REG_B + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[CP_VAT_REG_C + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
}
for (int i = 0; i < CP_NUM_ARRAYS; ++i)
{
memory[ARRAY_BASE + i] = g_main_cp_state.array_bases[i];
memory[ARRAY_STRIDE + i] = g_main_cp_state.array_strides[i];
}
}

View File

@ -3,17 +3,24 @@
#pragma once #pragma once
#include <array>
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
class DataReader; class DataReader;
class NativeVertexFormat; class NativeVertexFormat;
struct PortableVertexDeclaration; struct PortableVertexDeclaration;
namespace OpcodeDecoder
{
enum class Primitive : u8;
};
namespace VertexLoaderManager namespace VertexLoaderManager
{ {
using NativeVertexFormatMap = using NativeVertexFormatMap =
@ -35,12 +42,13 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl); NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl);
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed // Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess); int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess);
NativeVertexFormat* GetCurrentVertexFormat(); NativeVertexFormat* GetCurrentVertexFormat();
// Resolved pointers to array bases. Used by vertex loaders. // Resolved pointers to array bases. Used by vertex loaders.
extern u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS]; extern Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;
void UpdateVertexArrayPointers(); void UpdateVertexArrayPointers();
// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite). // Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
@ -50,4 +58,11 @@ extern u32 position_matrix_index[4];
// VB_HAS_X. Bitmask telling what vertex components are present. // VB_HAS_X. Bitmask telling what vertex components are present.
extern u32 g_current_components; extern u32 g_current_components;
extern BitSet8 g_main_vat_dirty;
extern BitSet8 g_preprocess_vat_dirty;
extern bool g_bases_dirty; // Main only
extern u8 g_current_vat; // Main only
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
} // namespace VertexLoaderManager } // namespace VertexLoaderManager

View File

@ -15,6 +15,7 @@
#include "Common/JitRegister.h" #include "Common/JitRegister.h"
#include "Common/x64ABI.h" #include "Common/x64ABI.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderManager.h"
@ -54,7 +55,7 @@ VertexLoaderX64::VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att)
JitRegister::Register(region, GetCodePtr(), name.c_str()); JitRegister::Register(region, GetCodePtr(), name.c_str());
} }
OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute) OpArg VertexLoaderX64::GetVertexAddr(CPArray array, VertexComponentFormat attribute)
{ {
OpArg data = MDisp(src_reg, m_src_ofs); OpArg data = MDisp(src_reg, m_src_ofs);
if (IsIndexed(attribute)) if (IsIndexed(attribute))
@ -62,7 +63,7 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute)
int bits = attribute == VertexComponentFormat::Index8 ? 8 : 16; int bits = attribute == VertexComponentFormat::Index8 ? 8 : 16;
LoadAndSwap(bits, scratch1, data); LoadAndSwap(bits, scratch1, data);
m_src_ofs += bits / 8; m_src_ofs += bits / 8;
if (array == ARRAY_POSITION) if (array == CPArray::Position)
{ {
CMP(bits, R(scratch1), Imm8(-1)); CMP(bits, R(scratch1), Imm8(-1));
m_skip_vertex = J_CC(CC_E, true); m_skip_vertex = J_CC(CC_E, true);
@ -121,7 +122,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
native_format->components = count_out; native_format->components = count_out;
native_format->enable = true; native_format->enable = true;
native_format->offset = m_dst_ofs; native_format->offset = m_dst_ofs;
native_format->type = VAR_FLOAT; native_format->type = ComponentFormat::Float;
native_format->integer = false; native_format->integer = false;
m_dst_ofs += sizeof(float) * count_out; m_dst_ofs += sizeof(float) * count_out;
@ -420,7 +421,7 @@ void VertexLoaderX64::GenerateVertexLoader()
m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = m_dst_ofs; m_native_vtx_decl.posmtx.offset = m_dst_ofs;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true; m_native_vtx_decl.posmtx.integer = true;
m_src_ofs += sizeof(u8); m_src_ofs += sizeof(u8);
m_dst_ofs += sizeof(u32); m_dst_ofs += sizeof(u32);
@ -433,7 +434,7 @@ void VertexLoaderX64::GenerateVertexLoader()
texmatidx_ofs[i] = m_src_ofs++; texmatidx_ofs[i] = m_src_ofs++;
} }
OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.low.Position); OpArg data = GetVertexAddr(CPArray::Position, m_VtxDesc.low.Position);
int pos_elements = m_VtxAttr.g0.PosElements == CoordComponentCount::XY ? 2 : 3; int pos_elements = m_VtxAttr.g0.PosElements == CoordComponentCount::XY ? 2 : 3;
ReadVertex(data, m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements, ReadVertex(data, m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements,
m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position); m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position);
@ -448,7 +449,7 @@ void VertexLoaderX64::GenerateVertexLoader()
{ {
if (!i || m_VtxAttr.g0.NormalIndex3) if (!i || m_VtxAttr.g0.NormalIndex3)
{ {
data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.low.Normal); data = GetVertexAddr(CPArray::Normal, m_VtxDesc.low.Normal);
int elem_size = GetElementSize(m_VtxAttr.g0.NormalFormat); int elem_size = GetElementSize(m_VtxAttr.g0.NormalFormat);
data.AddMemOffset(i * elem_size * 3); data.AddMemOffset(i * elem_size * 3);
} }
@ -457,27 +458,27 @@ void VertexLoaderX64::GenerateVertexLoader()
} }
} }
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{ {
if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent) if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
{ {
data = GetVertexAddr(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i]); data = GetVertexAddr(CPArray::Color0 + i, m_VtxDesc.low.Color[i]);
ReadColor(data, m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i)); ReadColor(data, m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i));
m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true; m_native_vtx_decl.colors[i].enable = true;
m_native_vtx_decl.colors[i].offset = m_dst_ofs; m_native_vtx_decl.colors[i].offset = m_dst_ofs;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false; m_native_vtx_decl.colors[i].integer = false;
m_dst_ofs += 4; m_dst_ofs += 4;
} }
} }
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{ {
int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::ST ? 2 : 1; int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::ST ? 2 : 1;
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent) if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
{ {
data = GetVertexAddr(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i]); data = GetVertexAddr(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i]);
u8 scaling_exponent = m_VtxAttr.GetTexFrac(i); u8 scaling_exponent = m_VtxAttr.GetTexFrac(i);
ReadVertex(data, m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements, ReadVertex(data, m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements,
m_VtxDesc.low.TexMatIdx[i] ? 2 : elements, m_VtxAttr.g0.ByteDequant, m_VtxDesc.low.TexMatIdx[i] ? 2 : elements, m_VtxAttr.g0.ByteDequant,
@ -487,7 +488,7 @@ void VertexLoaderX64::GenerateVertexLoader()
{ {
m_native_vtx_decl.texcoords[i].components = 3; m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true; m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false; m_native_vtx_decl.texcoords[i].integer = false;
MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i])); MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i]));
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent) if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)

View File

@ -10,6 +10,7 @@
enum class VertexComponentFormat; enum class VertexComponentFormat;
enum class ComponentFormat; enum class ComponentFormat;
enum class ColorFormat; enum class ColorFormat;
enum class CPArray : u8;
class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock
{ {
@ -23,7 +24,7 @@ private:
u32 m_src_ofs = 0; u32 m_src_ofs = 0;
u32 m_dst_ofs = 0; u32 m_dst_ofs = 0;
Gen::FixupBranch m_skip_vertex; Gen::FixupBranch m_skip_vertex;
Gen::OpArg GetVertexAddr(int array, VertexComponentFormat attribute); Gen::OpArg GetVertexAddr(CPArray array, VertexComponentFormat attribute);
int ReadVertex(Gen::OpArg data, VertexComponentFormat attribute, ComponentFormat format, int ReadVertex(Gen::OpArg data, VertexComponentFormat attribute, ComponentFormat format,
int count_in, int count_out, bool dequantize, u8 scaling_exponent, int count_in, int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format); AttributeFormat* native_format);

View File

@ -6,6 +6,7 @@
#include <cstring> #include <cstring>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "Common/Swap.h" #include "Common/Swap.h"
@ -78,8 +79,8 @@ void Color_ReadIndex_16b_565(VertexLoader* loader)
{ {
const auto index = DataRead<I>(); const auto index = DataRead<I>();
const u8* const address = const u8* const address =
VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
u16 value; u16 value;
std::memcpy(&value, address, sizeof(u16)); std::memcpy(&value, address, sizeof(u16));
@ -91,8 +92,8 @@ template <typename I>
void Color_ReadIndex_24b_888(VertexLoader* loader) void Color_ReadIndex_24b_888(VertexLoader* loader)
{ {
const auto index = DataRead<I>(); const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read24(address)); SetCol(loader, Read24(address));
} }
@ -100,18 +101,18 @@ template <typename I>
void Color_ReadIndex_32b_888x(VertexLoader* loader) void Color_ReadIndex_32b_888x(VertexLoader* loader)
{ {
const auto index = DataRead<I>(); const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read24(address)); SetCol(loader, Read24(address));
} }
template <typename I> template <typename I>
void Color_ReadIndex_16b_4444(VertexLoader* loader) void Color_ReadIndex_16b_4444(VertexLoader* loader)
{ {
auto const index = DataRead<I>(); const auto index = DataRead<I>();
const u8* const address = const u8* const address =
VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
u16 value; u16 value;
std::memcpy(&value, address, sizeof(u16)); std::memcpy(&value, address, sizeof(u16));
@ -123,9 +124,9 @@ template <typename I>
void Color_ReadIndex_24b_6666(VertexLoader* loader) void Color_ReadIndex_24b_6666(VertexLoader* loader)
{ {
const auto index = DataRead<I>(); const auto index = DataRead<I>();
const u8* data = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + const u8* data = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]) - 1; (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
const u32 val = Common::swap32(data); const u32 val = Common::swap24(data);
SetCol6666(loader, val); SetCol6666(loader, val);
} }
@ -133,8 +134,8 @@ template <typename I>
void Color_ReadIndex_32b_8888(VertexLoader* loader) void Color_ReadIndex_32b_8888(VertexLoader* loader)
{ {
const auto index = DataRead<I>(); const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read32(address)); SetCol(loader, Read32(address));
} }
@ -166,7 +167,7 @@ void Color_ReadDirect_16b_4444(VertexLoader* loader)
void Color_ReadDirect_24b_6666(VertexLoader* loader) void Color_ReadDirect_24b_6666(VertexLoader* loader)
{ {
SetCol6666(loader, Common::swap32(DataGetPosition() - 1)); SetCol6666(loader, Common::swap24(DataGetPosition()));
DataSkip(3); DataSkip(3);
} }
@ -175,21 +176,40 @@ void Color_ReadDirect_32b_8888(VertexLoader* loader)
SetCol(loader, DataReadU32Unswapped()); SetCol(loader, DataReadU32Unswapped());
} }
constexpr TPipelineFunction s_table_read_color[4][6] = { using Common::EnumMap;
{nullptr, nullptr, nullptr, nullptr, nullptr, nullptr},
{Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x, // These functions are to work around a "too many initializer values" error with nested brackets
Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888}, // C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
{Color_ReadIndex_16b_565<u8>, Color_ReadIndex_24b_888<u8>, Color_ReadIndex_32b_888x<u8>, // (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
Color_ReadIndex_16b_4444<u8>, Color_ReadIndex_24b_6666<u8>, Color_ReadIndex_32b_8888<u8>}, constexpr EnumMap<TPipelineFunction, ColorFormat::RGBA8888>
{Color_ReadIndex_16b_565<u16>, Color_ReadIndex_24b_888<u16>, Color_ReadIndex_32b_888x<u16>, f(EnumMap<TPipelineFunction, ColorFormat::RGBA8888> in)
Color_ReadIndex_16b_4444<u16>, Color_ReadIndex_24b_6666<u16>, Color_ReadIndex_32b_8888<u16>}, {
return in;
}
constexpr EnumMap<u32, ColorFormat::RGBA8888> g(EnumMap<u32, ColorFormat::RGBA8888> in)
{
return in;
}
template <typename T>
using Table = EnumMap<EnumMap<T, ColorFormat::RGBA8888>, VertexComponentFormat::Index16>;
constexpr Table<TPipelineFunction> s_table_read_color = {
f({nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}),
f({Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x,
Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888}),
f({Color_ReadIndex_16b_565<u8>, Color_ReadIndex_24b_888<u8>, Color_ReadIndex_32b_888x<u8>,
Color_ReadIndex_16b_4444<u8>, Color_ReadIndex_24b_6666<u8>, Color_ReadIndex_32b_8888<u8>}),
f({Color_ReadIndex_16b_565<u16>, Color_ReadIndex_24b_888<u16>, Color_ReadIndex_32b_888x<u16>,
Color_ReadIndex_16b_4444<u16>, Color_ReadIndex_24b_6666<u16>,
Color_ReadIndex_32b_8888<u16>}),
}; };
constexpr u32 s_table_read_color_vertex_size[4][6] = { constexpr Table<u32> s_table_read_color_vertex_size = {
{0, 0, 0, 0, 0, 0}, g({0u, 0u, 0u, 0u, 0u, 0u}),
{2, 3, 4, 2, 3, 4}, g({2u, 3u, 4u, 2u, 3u, 4u}),
{1, 1, 1, 1, 1, 1}, g({1u, 1u, 1u, 1u, 1u, 1u}),
{2, 2, 2, 2, 2, 2}, g({2u, 2u, 2u, 2u, 2u, 2u}),
}; };
} // Anonymous namespace } // Anonymous namespace
@ -200,7 +220,7 @@ u32 VertexLoader_Color::GetSize(VertexComponentFormat type, ColorFormat format)
PanicAlertFmt("Invalid color format {}", format); PanicAlertFmt("Invalid color format {}", format);
return 0; return 0;
} }
return s_table_read_color_vertex_size[u32(type)][u32(format)]; return s_table_read_color_vertex_size[type][format];
} }
TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, ColorFormat format) TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, ColorFormat format)
@ -210,5 +230,5 @@ TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, Co
PanicAlertFmt("Invalid color format {}", format); PanicAlertFmt("Invalid color format {}", format);
return nullptr; return nullptr;
} }
return s_table_read_color[u32(type)][u32(format)]; return s_table_read_color[type][format];
} }

View File

@ -7,6 +7,7 @@
#include <type_traits> #include <type_traits>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader.h"
@ -58,7 +59,7 @@ struct Normal_Direct
{ {
static void function([[maybe_unused]] VertexLoader* loader) static void function([[maybe_unused]] VertexLoader* loader)
{ {
auto const source = reinterpret_cast<const T*>(DataGetPosition()); const auto source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source); ReadIndirect<T, N * 3>(source);
DataSkip<N * 3 * sizeof(T)>(); DataSkip<N * 3 * sizeof(T)>();
} }
@ -71,10 +72,10 @@ void Normal_Index_Offset()
{ {
static_assert(std::is_unsigned_v<I>, "Only unsigned I is sane!"); static_assert(std::is_unsigned_v<I>, "Only unsigned I is sane!");
auto const index = DataRead<I>(); const auto index = DataRead<I>();
auto const data = reinterpret_cast<const T*>( const auto data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[ARRAY_NORMAL] + VertexLoaderManager::cached_arraybases[CPArray::Normal] +
(index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset); (index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data); ReadIndirect<T, N * 3>(data);
} }
@ -98,39 +99,6 @@ struct Normal_Index_Indices3
static constexpr u32 size = sizeof(I) * 3; static constexpr u32 size = sizeof(I) * 3;
}; };
enum NormalType
{
NRM_NOT_PRESENT = 0,
NRM_DIRECT = 1,
NRM_INDEX8 = 2,
NRM_INDEX16 = 3,
NUM_NRM_TYPE
};
enum NormalFormat
{
FORMAT_UBYTE = 0,
FORMAT_BYTE = 1,
FORMAT_USHORT = 2,
FORMAT_SHORT = 3,
FORMAT_FLOAT = 4,
NUM_NRM_FORMAT
};
enum NormalElements
{
NRM_NBT = 0,
NRM_NBT3 = 1,
NUM_NRM_ELEMENTS
};
enum NormalIndices
{
NRM_INDICES1 = 0,
NRM_INDICES3 = 1,
NUM_NRM_INDICES
};
struct Set struct Set
{ {
template <typename T> template <typename T>
@ -145,83 +113,88 @@ struct Set
TPipelineFunction function; TPipelineFunction function;
}; };
using Formats = std::array<Set, NUM_NRM_FORMAT>; using Common::EnumMap;
using Elements = std::array<Formats, NUM_NRM_ELEMENTS>; using Formats = EnumMap<Set, ComponentFormat::Float>;
using Indices = std::array<Elements, NUM_NRM_INDICES>; using Elements = EnumMap<Formats, NormalComponentCount::NBT>;
using Types = std::array<Indices, NUM_NRM_TYPE>; using Indices = std::array<Elements, 2>;
using Types = EnumMap<Indices, VertexComponentFormat::Index16>;
constexpr Types InitializeTable() constexpr Types InitializeTable()
{ {
Types table{}; Types table{};
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct<u8, 1>(); using VCF = VertexComponentFormat;
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct<s8, 1>(); using NCC = NormalComponentCount;
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct<u16, 1>(); using FMT = ComponentFormat;
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct<s16, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct<float, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
// Same as above table[VCF::Direct][false][NCC::N][FMT::UByte] = Normal_Direct<u8, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct<u8, 1>(); table[VCF::Direct][false][NCC::N][FMT::Byte] = Normal_Direct<s8, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct<s8, 1>(); table[VCF::Direct][false][NCC::N][FMT::UShort] = Normal_Direct<u16, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct<u16, 1>(); table[VCF::Direct][false][NCC::N][FMT::Short] = Normal_Direct<s16, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct<s16, 1>(); table[VCF::Direct][false][NCC::N][FMT::Float] = Normal_Direct<float, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct<float, 1>(); table[VCF::Direct][false][NCC::NBT][FMT::UByte] = Normal_Direct<u8, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>(); table[VCF::Direct][false][NCC::NBT][FMT::Byte] = Normal_Direct<s8, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>(); table[VCF::Direct][false][NCC::NBT][FMT::UShort] = Normal_Direct<u16, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>(); table[VCF::Direct][false][NCC::NBT][FMT::Short] = Normal_Direct<s16, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>(); table[VCF::Direct][false][NCC::NBT][FMT::Float] = Normal_Direct<float, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u8, u8, 1>(); // Same as above, since there are no indices
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index<u8, s8, 1>(); table[VCF::Direct][true][NCC::N][FMT::UByte] = Normal_Direct<u8, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index<u8, u16, 1>(); table[VCF::Direct][true][NCC::N][FMT::Byte] = Normal_Direct<s8, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index<u8, s16, 1>(); table[VCF::Direct][true][NCC::N][FMT::UShort] = Normal_Direct<u16, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u8, float, 1>(); table[VCF::Direct][true][NCC::N][FMT::Short] = Normal_Direct<s16, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u8, u8, 3>(); table[VCF::Direct][true][NCC::N][FMT::Float] = Normal_Direct<float, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u8, s8, 3>(); table[VCF::Direct][true][NCC::NBT][FMT::UByte] = Normal_Direct<u8, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u8, u16, 3>(); table[VCF::Direct][true][NCC::NBT][FMT::Byte] = Normal_Direct<s8, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u8, s16, 3>(); table[VCF::Direct][true][NCC::NBT][FMT::UShort] = Normal_Direct<u16, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u8, float, 3>(); table[VCF::Direct][true][NCC::NBT][FMT::Short] = Normal_Direct<s16, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::Float] = Normal_Direct<float, 3>();
// Same as above for NRM_NBT table[VCF::Index8][false][NCC::N][FMT::UByte] = Normal_Index<u8, u8, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u8, u8, 1>(); table[VCF::Index8][false][NCC::N][FMT::Byte] = Normal_Index<u8, s8, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index<u8, s8, 1>(); table[VCF::Index8][false][NCC::N][FMT::UShort] = Normal_Index<u8, u16, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index<u8, u16, 1>(); table[VCF::Index8][false][NCC::N][FMT::Short] = Normal_Index<u8, s16, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index<u8, s16, 1>(); table[VCF::Index8][false][NCC::N][FMT::Float] = Normal_Index<u8, float, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u8, float, 1>(); table[VCF::Index8][false][NCC::NBT][FMT::UByte] = Normal_Index<u8, u8, 3>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u8, u8>(); table[VCF::Index8][false][NCC::NBT][FMT::Byte] = Normal_Index<u8, s8, 3>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u8, s8>(); table[VCF::Index8][false][NCC::NBT][FMT::UShort] = Normal_Index<u8, u16, 3>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u8, u16>(); table[VCF::Index8][false][NCC::NBT][FMT::Short] = Normal_Index<u8, s16, 3>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u8, s16>(); table[VCF::Index8][false][NCC::NBT][FMT::Float] = Normal_Index<u8, float, 3>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u8, float>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u16, u8, 1>(); // Same for NormalComponentCount::N; differs for NBT
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index<u16, s8, 1>(); table[VCF::Index8][true][NCC::N][FMT::UByte] = Normal_Index<u8, u8, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index<u16, u16, 1>(); table[VCF::Index8][true][NCC::N][FMT::Byte] = Normal_Index<u8, s8, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index<u16, s16, 1>(); table[VCF::Index8][true][NCC::N][FMT::UShort] = Normal_Index<u8, u16, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u16, float, 1>(); table[VCF::Index8][true][NCC::N][FMT::Short] = Normal_Index<u8, s16, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u16, u8, 3>(); table[VCF::Index8][true][NCC::N][FMT::Float] = Normal_Index<u8, float, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u16, s8, 3>(); table[VCF::Index8][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3<u8, u8>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u16, u16, 3>(); table[VCF::Index8][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3<u8, s8>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u16, s16, 3>(); table[VCF::Index8][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3<u8, u16>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u16, float, 3>(); table[VCF::Index8][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3<u8, s16>();
table[VCF::Index8][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3<u8, float>();
// Same as above for NRM_NBT table[VCF::Index16][false][NCC::N][FMT::UByte] = Normal_Index<u16, u8, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u16, u8, 1>(); table[VCF::Index16][false][NCC::N][FMT::Byte] = Normal_Index<u16, s8, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index<u16, s8, 1>(); table[VCF::Index16][false][NCC::N][FMT::UShort] = Normal_Index<u16, u16, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index<u16, u16, 1>(); table[VCF::Index16][false][NCC::N][FMT::Short] = Normal_Index<u16, s16, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index<u16, s16, 1>(); table[VCF::Index16][false][NCC::N][FMT::Float] = Normal_Index<u16, float, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u16, float, 1>(); table[VCF::Index16][false][NCC::NBT][FMT::UByte] = Normal_Index<u16, u8, 3>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u16, u8>(); table[VCF::Index16][false][NCC::NBT][FMT::Byte] = Normal_Index<u16, s8, 3>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u16, s8>(); table[VCF::Index16][false][NCC::NBT][FMT::UShort] = Normal_Index<u16, u16, 3>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>(); table[VCF::Index16][false][NCC::NBT][FMT::Short] = Normal_Index<u16, s16, 3>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>(); table[VCF::Index16][false][NCC::NBT][FMT::Float] = Normal_Index<u16, float, 3>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
// Same for NormalComponentCount::N; differs for NBT
table[VCF::Index16][true][NCC::N][FMT::UByte] = Normal_Index<u16, u8, 1>();
table[VCF::Index16][true][NCC::N][FMT::Byte] = Normal_Index<u16, s8, 1>();
table[VCF::Index16][true][NCC::N][FMT::UShort] = Normal_Index<u16, u16, 1>();
table[VCF::Index16][true][NCC::N][FMT::Short] = Normal_Index<u16, s16, 1>();
table[VCF::Index16][true][NCC::N][FMT::Float] = Normal_Index<u16, float, 1>();
table[VCF::Index16][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3<u16, u8>();
table[VCF::Index16][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3<u16, s8>();
table[VCF::Index16][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3<u16, u16>();
table[VCF::Index16][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3<u16, s16>();
table[VCF::Index16][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3<u16, float>();
return table; return table;
} }
@ -230,14 +203,14 @@ constexpr Types s_table = InitializeTable();
} // Anonymous namespace } // Anonymous namespace
u32 VertexLoader_Normal::GetSize(VertexComponentFormat type, ComponentFormat format, u32 VertexLoader_Normal::GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3) NormalComponentCount elements, bool index3)
{ {
return s_table[u32(type)][index3][u32(elements)][u32(format)].gc_size; return s_table[type][index3][elements][format].gc_size;
} }
TPipelineFunction VertexLoader_Normal::GetFunction(VertexComponentFormat type, TPipelineFunction VertexLoader_Normal::GetFunction(VertexComponentFormat type,
ComponentFormat format, ComponentFormat format,
NormalComponentCount elements, u32 index3) NormalComponentCount elements, bool index3)
{ {
return s_table[u32(type)][index3][u32(elements)][u32(format)].function; return s_table[type][index3][elements][format].function;
} }

View File

@ -14,8 +14,8 @@ class VertexLoader_Normal
{ {
public: public:
static u32 GetSize(VertexComponentFormat type, ComponentFormat format, static u32 GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3); NormalComponentCount elements, bool index3);
static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format, static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3); NormalComponentCount elements, bool index3);
}; };

View File

@ -7,6 +7,7 @@
#include <type_traits> #include <type_traits>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Swap.h" #include "Common/Swap.h"
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
@ -59,8 +60,8 @@ void Pos_ReadIndex(VertexLoader* loader)
const auto index = DataRead<I>(); const auto index = DataRead<I>();
loader->m_vertexSkip = index == std::numeric_limits<I>::max(); loader->m_vertexSkip = index == std::numeric_limits<I>::max();
const auto data = const auto data =
reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[ARRAY_POSITION] + reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[CPArray::Position] +
(index * g_main_cp_state.array_strides[ARRAY_POSITION])); (index * g_main_cp_state.array_strides[CPArray::Position]));
const auto scale = loader->m_posScale; const auto scale = loader->m_posScale;
DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader dst(g_vertex_manager_write_ptr, nullptr);
@ -76,138 +77,109 @@ void Pos_ReadIndex(VertexLoader* loader)
LOG_VTX(); LOG_VTX();
} }
constexpr TPipelineFunction s_table_read_position[4][8][2] = { using Common::EnumMap;
{
{ // These functions are to work around a "too many initializer values" error with nested brackets
nullptr, // C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
nullptr, // (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
}, constexpr EnumMap<TPipelineFunction, CoordComponentCount::XYZ> e(TPipelineFunction xy,
{ TPipelineFunction xyz)
nullptr, {
nullptr, return {xy, xyz};
}, }
{ constexpr EnumMap<u32, CoordComponentCount::XYZ> e(u32 xy, u32 xyz)
nullptr, {
nullptr, return {xy, xyz};
}, }
{
nullptr, constexpr EnumMap<EnumMap<TPipelineFunction, CoordComponentCount::XYZ>, ComponentFormat::Float>
nullptr, f(EnumMap<EnumMap<TPipelineFunction, CoordComponentCount::XYZ>, ComponentFormat::Float> in)
}, {
{ return in;
nullptr, }
nullptr,
}, constexpr EnumMap<EnumMap<u32, CoordComponentCount::XYZ>, ComponentFormat::Float>
}, g(EnumMap<EnumMap<u32, CoordComponentCount::XYZ>, ComponentFormat::Float> in)
{ {
{ return in;
Pos_ReadDirect<u8, 2>, }
Pos_ReadDirect<u8, 3>,
}, template <typename T>
{ using Table = EnumMap<EnumMap<EnumMap<T, CoordComponentCount::XYZ>, ComponentFormat::Float>,
Pos_ReadDirect<s8, 2>, VertexComponentFormat::Index16>;
Pos_ReadDirect<s8, 3>,
}, constexpr Table<TPipelineFunction> s_table_read_position = {
{ f({
Pos_ReadDirect<u16, 2>, e(nullptr, nullptr),
Pos_ReadDirect<u16, 3>, e(nullptr, nullptr),
}, e(nullptr, nullptr),
{ e(nullptr, nullptr),
Pos_ReadDirect<s16, 2>, e(nullptr, nullptr),
Pos_ReadDirect<s16, 3>, }),
}, f({
{ e(Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>),
Pos_ReadDirect<float, 2>, e(Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>),
Pos_ReadDirect<float, 3>, e(Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>),
}, e(Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>),
}, e(Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>),
{ }),
{ f({
Pos_ReadIndex<u8, u8, 2>, e(Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>),
Pos_ReadIndex<u8, u8, 3>, e(Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>),
}, e(Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>),
{ e(Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>),
Pos_ReadIndex<u8, s8, 2>, e(Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>),
Pos_ReadIndex<u8, s8, 3>, }),
}, f({
{ e(Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>),
Pos_ReadIndex<u8, u16, 2>, e(Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>),
Pos_ReadIndex<u8, u16, 3>, e(Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>),
}, e(Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>),
{ e(Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>),
Pos_ReadIndex<u8, s16, 2>, }),
Pos_ReadIndex<u8, s16, 3>,
},
{
Pos_ReadIndex<u8, float, 2>,
Pos_ReadIndex<u8, float, 3>,
},
},
{
{
Pos_ReadIndex<u16, u8, 2>,
Pos_ReadIndex<u16, u8, 3>,
},
{
Pos_ReadIndex<u16, s8, 2>,
Pos_ReadIndex<u16, s8, 3>,
},
{
Pos_ReadIndex<u16, u16, 2>,
Pos_ReadIndex<u16, u16, 3>,
},
{
Pos_ReadIndex<u16, s16, 2>,
Pos_ReadIndex<u16, s16, 3>,
},
{
Pos_ReadIndex<u16, float, 2>,
Pos_ReadIndex<u16, float, 3>,
},
},
}; };
constexpr u32 s_table_read_position_vertex_size[4][8][2] = { constexpr Table<u32> s_table_read_position_vertex_size = {
{ g({
{0, 0}, e(0u, 0u),
{0, 0}, e(0u, 0u),
{0, 0}, e(0u, 0u),
{0, 0}, e(0u, 0u),
{0, 0}, e(0u, 0u),
}, }),
{ g({
{2, 3}, e(2, 3),
{2, 3}, e(2, 3),
{4, 6}, e(4, 6),
{4, 6}, e(4, 6),
{8, 12}, e(8, 12),
}, }),
{ g({
{1, 1}, e(1, 1),
{1, 1}, e(1, 1),
{1, 1}, e(1, 1),
{1, 1}, e(1, 1),
{1, 1}, e(1, 1),
}, }),
{ g({
{2, 2}, e(2, 2),
{2, 2}, e(2, 2),
{2, 2}, e(2, 2),
{2, 2}, e(2, 2),
{2, 2}, e(2, 2),
}, }),
}; };
} // Anonymous namespace } // Anonymous namespace
u32 VertexLoader_Position::GetSize(VertexComponentFormat type, ComponentFormat format, u32 VertexLoader_Position::GetSize(VertexComponentFormat type, ComponentFormat format,
CoordComponentCount elements) CoordComponentCount elements)
{ {
return s_table_read_position_vertex_size[u32(type)][u32(format)][u32(elements)]; return s_table_read_position_vertex_size[type][format][elements];
} }
TPipelineFunction VertexLoader_Position::GetFunction(VertexComponentFormat type, TPipelineFunction VertexLoader_Position::GetFunction(VertexComponentFormat type,
ComponentFormat format, ComponentFormat format,
CoordComponentCount elements) CoordComponentCount elements)
{ {
return s_table_read_position[u32(type)][u32(format)][u32(elements)]; return s_table_read_position[type][format][elements];
} }

View File

@ -55,8 +55,8 @@ void TexCoord_ReadIndex(VertexLoader* loader)
const auto index = DataRead<I>(); const auto index = DataRead<I>();
const auto data = reinterpret_cast<const T*>( const auto data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + VertexLoaderManager::cached_arraybases[CPArray::TexCoord0 + loader->m_tcIndex] +
(index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); (index * g_main_cp_state.array_strides[CPArray::TexCoord0 + loader->m_tcIndex]));
const auto scale = loader->m_tcScale[loader->m_tcIndex]; const auto scale = loader->m_tcScale[loader->m_tcIndex];
DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader dst(g_vertex_manager_write_ptr, nullptr);
@ -67,140 +67,110 @@ void TexCoord_ReadIndex(VertexLoader* loader)
++loader->m_tcIndex; ++loader->m_tcIndex;
} }
constexpr TPipelineFunction s_table_read_tex_coord[4][8][2] = { using Common::EnumMap;
{ // These functions are to work around a "too many initializer values" error with nested brackets
{ // C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
nullptr, // (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
nullptr, constexpr EnumMap<TPipelineFunction, TexComponentCount::ST> e(TPipelineFunction s,
}, TPipelineFunction st)
{ {
nullptr, return {s, st};
nullptr, }
}, constexpr EnumMap<u32, TexComponentCount::ST> e(u32 s, u32 st)
{ {
nullptr, return {s, st};
nullptr, }
},
{ constexpr EnumMap<EnumMap<TPipelineFunction, TexComponentCount::ST>, ComponentFormat::Float>
nullptr, f(EnumMap<EnumMap<TPipelineFunction, TexComponentCount::ST>, ComponentFormat::Float> in)
nullptr, {
}, return in;
{ }
nullptr,
nullptr, constexpr EnumMap<EnumMap<u32, TexComponentCount::ST>, ComponentFormat::Float>
}, g(EnumMap<EnumMap<u32, TexComponentCount::ST>, ComponentFormat::Float> in)
}, {
{ return in;
{ }
TexCoord_ReadDirect<u8, 1>,
TexCoord_ReadDirect<u8, 2>, template <typename T>
}, using Table = EnumMap<EnumMap<EnumMap<T, TexComponentCount::ST>, ComponentFormat::Float>,
{ VertexComponentFormat::Index16>;
TexCoord_ReadDirect<s8, 1>,
TexCoord_ReadDirect<s8, 2>, constexpr Table<TPipelineFunction> s_table_read_tex_coord = {
}, f({
{ e(nullptr, nullptr),
TexCoord_ReadDirect<u16, 1>, e(nullptr, nullptr),
TexCoord_ReadDirect<u16, 2>, e(nullptr, nullptr),
}, e(nullptr, nullptr),
{ e(nullptr, nullptr),
TexCoord_ReadDirect<s16, 1>, }),
TexCoord_ReadDirect<s16, 2>, f({
}, e(TexCoord_ReadDirect<u8, 1>, TexCoord_ReadDirect<u8, 2>),
{ e(TexCoord_ReadDirect<s8, 1>, TexCoord_ReadDirect<s8, 2>),
TexCoord_ReadDirect<float, 1>, e(TexCoord_ReadDirect<u16, 1>, TexCoord_ReadDirect<u16, 2>),
TexCoord_ReadDirect<float, 2>, e(TexCoord_ReadDirect<s16, 1>, TexCoord_ReadDirect<s16, 2>),
}, e(TexCoord_ReadDirect<float, 1>, TexCoord_ReadDirect<float, 2>),
}, }),
{ f({
{ e(TexCoord_ReadIndex<u8, u8, 1>, TexCoord_ReadIndex<u8, u8, 2>),
TexCoord_ReadIndex<u8, u8, 1>, e(TexCoord_ReadIndex<u8, s8, 1>, TexCoord_ReadIndex<u8, s8, 2>),
TexCoord_ReadIndex<u8, u8, 2>, e(TexCoord_ReadIndex<u8, u16, 1>, TexCoord_ReadIndex<u8, u16, 2>),
}, e(TexCoord_ReadIndex<u8, s16, 1>, TexCoord_ReadIndex<u8, s16, 2>),
{ e(TexCoord_ReadIndex<u8, float, 1>, TexCoord_ReadIndex<u8, float, 2>),
TexCoord_ReadIndex<u8, s8, 1>, }),
TexCoord_ReadIndex<u8, s8, 2>, f({
}, e(TexCoord_ReadIndex<u16, u8, 1>, TexCoord_ReadIndex<u16, u8, 2>),
{ e(TexCoord_ReadIndex<u16, s8, 1>, TexCoord_ReadIndex<u16, s8, 2>),
TexCoord_ReadIndex<u8, u16, 1>, e(TexCoord_ReadIndex<u16, u16, 1>, TexCoord_ReadIndex<u16, u16, 2>),
TexCoord_ReadIndex<u8, u16, 2>, e(TexCoord_ReadIndex<u16, s16, 1>, TexCoord_ReadIndex<u16, s16, 2>),
}, e(TexCoord_ReadIndex<u16, float, 1>, TexCoord_ReadIndex<u16, float, 2>),
{ }),
TexCoord_ReadIndex<u8, s16, 1>,
TexCoord_ReadIndex<u8, s16, 2>,
},
{
TexCoord_ReadIndex<u8, float, 1>,
TexCoord_ReadIndex<u8, float, 2>,
},
},
{
{
TexCoord_ReadIndex<u16, u8, 1>,
TexCoord_ReadIndex<u16, u8, 2>,
},
{
TexCoord_ReadIndex<u16, s8, 1>,
TexCoord_ReadIndex<u16, s8, 2>,
},
{
TexCoord_ReadIndex<u16, u16, 1>,
TexCoord_ReadIndex<u16, u16, 2>,
},
{
TexCoord_ReadIndex<u16, s16, 1>,
TexCoord_ReadIndex<u16, s16, 2>,
},
{
TexCoord_ReadIndex<u16, float, 1>,
TexCoord_ReadIndex<u16, float, 2>,
},
},
}; };
constexpr u32 s_table_read_tex_coord_vertex_size[4][8][2] = { constexpr Table<u32> s_table_read_tex_coord_vertex_size = {
{ g({
{0, 0}, e(0u, 0u),
{0, 0}, e(0u, 0u),
{0, 0}, e(0u, 0u),
{0, 0}, e(0u, 0u),
{0, 0}, e(0u, 0u),
}, }),
{ g({
{1, 2}, e(1, 2),
{1, 2}, e(1, 2),
{2, 4}, e(2, 4),
{2, 4}, e(2, 4),
{4, 8}, e(4, 8),
}, }),
{ g({
{1, 1}, e(1, 1),
{1, 1}, e(1, 1),
{1, 1}, e(1, 1),
{1, 1}, e(1, 1),
{1, 1}, e(1, 1),
}, }),
{ g({
{2, 2}, e(2, 2),
{2, 2}, e(2, 2),
{2, 2}, e(2, 2),
{2, 2}, e(2, 2),
{2, 2}, e(2, 2),
}, }),
}; };
} // Anonymous namespace } // Anonymous namespace
u32 VertexLoader_TextCoord::GetSize(VertexComponentFormat type, ComponentFormat format, u32 VertexLoader_TextCoord::GetSize(VertexComponentFormat type, ComponentFormat format,
TexComponentCount elements) TexComponentCount elements)
{ {
return s_table_read_tex_coord_vertex_size[u32(type)][u32(format)][u32(elements)]; return s_table_read_tex_coord_vertex_size[type][format][elements];
} }
TPipelineFunction VertexLoader_TextCoord::GetFunction(VertexComponentFormat type, TPipelineFunction VertexLoader_TextCoord::GetFunction(VertexComponentFormat type,
ComponentFormat format, ComponentFormat format,
TexComponentCount elements) TexComponentCount elements)
{ {
return s_table_read_tex_coord[u32(type)][u32(format)][u32(elements)]; return s_table_read_tex_coord[type][format][elements];
} }
TPipelineFunction VertexLoader_TextCoord::GetDummyFunction() TPipelineFunction VertexLoader_TextCoord::GetDummyFunction()

View File

@ -10,6 +10,7 @@
#include "Common/BitSet.h" #include "Common/BitSet.h"
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
@ -38,8 +39,10 @@
std::unique_ptr<VertexManagerBase> g_vertex_manager; std::unique_ptr<VertexManagerBase> g_vertex_manager;
using OpcodeDecoder::Primitive;
// GX primitive -> RenderState primitive, no primitive restart // GX primitive -> RenderState primitive, no primitive restart
constexpr std::array<PrimitiveType, 8> primitive_from_gx{{ constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx{
PrimitiveType::Triangles, // GX_DRAW_QUADS PrimitiveType::Triangles, // GX_DRAW_QUADS
PrimitiveType::Triangles, // GX_DRAW_QUADS_2 PrimitiveType::Triangles, // GX_DRAW_QUADS_2
PrimitiveType::Triangles, // GX_DRAW_TRIANGLES PrimitiveType::Triangles, // GX_DRAW_TRIANGLES
@ -48,10 +51,10 @@ constexpr std::array<PrimitiveType, 8> primitive_from_gx{{
PrimitiveType::Lines, // GX_DRAW_LINES PrimitiveType::Lines, // GX_DRAW_LINES
PrimitiveType::Lines, // GX_DRAW_LINE_STRIP PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
PrimitiveType::Points, // GX_DRAW_POINTS PrimitiveType::Points, // GX_DRAW_POINTS
}}; };
// GX primitive -> RenderState primitive, using primitive restart // GX primitive -> RenderState primitive, using primitive restart
constexpr std::array<PrimitiveType, 8> primitive_from_gx_pr{{ constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx_pr{
PrimitiveType::TriangleStrip, // GX_DRAW_QUADS PrimitiveType::TriangleStrip, // GX_DRAW_QUADS
PrimitiveType::TriangleStrip, // GX_DRAW_QUADS_2 PrimitiveType::TriangleStrip, // GX_DRAW_QUADS_2
PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLES PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLES
@ -60,7 +63,7 @@ constexpr std::array<PrimitiveType, 8> primitive_from_gx_pr{{
PrimitiveType::Lines, // GX_DRAW_LINES PrimitiveType::Lines, // GX_DRAW_LINES
PrimitiveType::Lines, // GX_DRAW_LINE_STRIP PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
PrimitiveType::Points, // GX_DRAW_POINTS PrimitiveType::Points, // GX_DRAW_POINTS
}}; };
// Due to the BT.601 standard which the GameCube is based on being a compromise // Due to the BT.601 standard which the GameCube is based on being a compromise
// between PAL and NTSC, neither standard gets square pixels. They are each off // between PAL and NTSC, neither standard gets square pixels. They are each off
@ -107,13 +110,13 @@ u32 VertexManagerBase::GetRemainingSize() const
return static_cast<u32>(m_end_buffer_pointer - m_cur_buffer_pointer); return static_cast<u32>(m_end_buffer_pointer - m_cur_buffer_pointer);
} }
void VertexManagerBase::AddIndices(int primitive, u32 num_vertices) void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
{ {
m_index_generator.AddIndices(primitive, num_vertices); m_index_generator.AddIndices(primitive, num_vertices);
} }
DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride, DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
bool cullall) u32 count, u32 stride, bool cullall)
{ {
// Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently. // Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently.
g_framebuffer_manager->FlushEFBPokes(); g_framebuffer_manager->FlushEFBPokes();
@ -185,7 +188,7 @@ void VertexManagerBase::FlushData(u32 count, u32 stride)
m_cur_buffer_pointer += count * stride; m_cur_buffer_pointer += count * stride;
} }
u32 VertexManagerBase::GetRemainingIndices(int primitive) const u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const
{ {
const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen(); const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen();
@ -193,22 +196,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const
{ {
switch (primitive) switch (primitive)
{ {
case OpcodeDecoder::GX_DRAW_QUADS: case Primitive::GX_DRAW_QUADS:
case OpcodeDecoder::GX_DRAW_QUADS_2: case Primitive::GX_DRAW_QUADS_2:
return index_len / 5 * 4; return index_len / 5 * 4;
case OpcodeDecoder::GX_DRAW_TRIANGLES: case Primitive::GX_DRAW_TRIANGLES:
return index_len / 4 * 3; return index_len / 4 * 3;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: case Primitive::GX_DRAW_TRIANGLE_STRIP:
return index_len / 1 - 1; return index_len / 1 - 1;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: case Primitive::GX_DRAW_TRIANGLE_FAN:
return index_len / 6 * 4 + 1; return index_len / 6 * 4 + 1;
case OpcodeDecoder::GX_DRAW_LINES: case Primitive::GX_DRAW_LINES:
return index_len; return index_len;
case OpcodeDecoder::GX_DRAW_LINE_STRIP: case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1; return index_len / 2 + 1;
case OpcodeDecoder::GX_DRAW_POINTS: case Primitive::GX_DRAW_POINTS:
return index_len; return index_len;
default: default:
@ -219,22 +222,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const
{ {
switch (primitive) switch (primitive)
{ {
case OpcodeDecoder::GX_DRAW_QUADS: case Primitive::GX_DRAW_QUADS:
case OpcodeDecoder::GX_DRAW_QUADS_2: case Primitive::GX_DRAW_QUADS_2:
return index_len / 6 * 4; return index_len / 6 * 4;
case OpcodeDecoder::GX_DRAW_TRIANGLES: case Primitive::GX_DRAW_TRIANGLES:
return index_len; return index_len;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: case Primitive::GX_DRAW_TRIANGLE_STRIP:
return index_len / 3 + 2; return index_len / 3 + 2;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: case Primitive::GX_DRAW_TRIANGLE_FAN:
return index_len / 3 + 2; return index_len / 3 + 2;
case OpcodeDecoder::GX_DRAW_LINES: case Primitive::GX_DRAW_LINES:
return index_len; return index_len;
case OpcodeDecoder::GX_DRAW_LINE_STRIP: case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1; return index_len / 2 + 1;
case OpcodeDecoder::GX_DRAW_POINTS: case Primitive::GX_DRAW_POINTS:
return index_len; return index_len;
default: default:

View File

@ -35,6 +35,11 @@ enum TexelBufferFormat : u32
NUM_TEXEL_BUFFER_FORMATS NUM_TEXEL_BUFFER_FORMATS
}; };
namespace OpcodeDecoder
{
enum class Primitive : u8;
};
class VertexManagerBase class VertexManagerBase
{ {
private: private:
@ -93,8 +98,9 @@ public:
virtual bool Initialize(); virtual bool Initialize();
PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
void AddIndices(int primitive, u32 num_vertices); void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall); DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride,
bool cullall);
void FlushData(u32 count, u32 stride); void FlushData(u32 count, u32 stride);
void Flush(); void Flush();
@ -163,7 +169,7 @@ protected:
virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex); virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex);
u32 GetRemainingSize() const; u32 GetRemainingSize() const;
u32 GetRemainingIndices(int primitive) const; u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const;
void CalculateZSlope(NativeVertexFormat* format); void CalculateZSlope(NativeVertexFormat* format);
void LoadTextures(); void LoadTextures();

View File

@ -23,6 +23,7 @@
#include "VideoCommon/FreeLookCamera.h" #include "VideoCommon/FreeLookCamera.h"
#include "VideoCommon/RenderBase.h" #include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h" #include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"

View File

@ -13,8 +13,6 @@
#include "Common/EnumFormatter.h" #include "Common/EnumFormatter.h"
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
class DataReader;
constexpr size_t NUM_XF_COLOR_CHANNELS = 2; constexpr size_t NUM_XF_COLOR_CHANNELS = 2;
// Lighting // Lighting
@ -454,10 +452,10 @@ struct XFMemory
u32 unk9[8]; // 0x1048 - 0x104f u32 unk9[8]; // 0x1048 - 0x104f
PostMtxInfo postMtxInfo[8]; // 0x1050 - 0x1057 PostMtxInfo postMtxInfo[8]; // 0x1050 - 0x1057
}; };
static_assert(sizeof(XFMemory) == sizeof(u32) * 0x1058); static_assert(sizeof(XFMemory) == sizeof(u32) * XFMEM_REGISTERS_END);
extern XFMemory xfmem; extern XFMemory xfmem;
void LoadXFReg(u32 transferSize, u32 address, DataReader src); void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data);
void LoadIndexedXF(u32 val, int array); void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size);
void PreprocessIndexedXF(u32 val, int refarray); void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size);

View File

@ -12,7 +12,6 @@
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h" #include "VideoCommon/Fifo.h"
#include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/PixelShaderManager.h"
@ -26,16 +25,10 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress)
VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize); VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize);
} }
static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) static void XFRegWritten(u32 address, u32 value)
{ {
u32 address = baseAddress; if (address >= XFMEM_REGISTERS_START && address < XFMEM_REGISTERS_END)
u32 dataIndex = 0;
while (transferSize > 0 && address < XFMEM_REGISTERS_END)
{ {
u32 newValue = src.Peek<u32>(dataIndex * sizeof(u32));
u32 nextAddress = address + 1;
switch (address) switch (address)
{ {
case XFMEM_ERROR: case XFMEM_ERROR:
@ -44,12 +37,12 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_STATE1: // internal state 1 case XFMEM_STATE1: // internal state 1
case XFMEM_CLOCK: case XFMEM_CLOCK:
case XFMEM_SETGPMETRIC: case XFMEM_SETGPMETRIC:
nextAddress = 0x1007; // Not implemented
break; break;
case XFMEM_CLIPDISABLE: case XFMEM_CLIPDISABLE:
{ {
ClipDisable setting{.hex = newValue}; ClipDisable setting{.hex = value};
if (setting.disable_clipping_detection) if (setting.disable_clipping_detection)
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::SETS_XF_CLIPDISABLE_BIT_0); DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::SETS_XF_CLIPDISABLE_BIT_0);
if (setting.disable_trivial_rejection) if (setting.disable_trivial_rejection)
@ -63,7 +56,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
break; break;
case XFMEM_SETNUMCHAN: case XFMEM_SETNUMCHAN:
if (xfmem.numChan.numColorChans != (newValue & 3)) if (xfmem.numChan.numColorChans != (value & 3))
g_vertex_manager->Flush(); g_vertex_manager->Flush();
VertexShaderManager::SetLightingConfigChanged(); VertexShaderManager::SetLightingConfigChanged();
break; break;
@ -72,7 +65,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_AMBCOLOR: case XFMEM_SETCHAN1_AMBCOLOR:
{ {
u8 chan = address - XFMEM_SETCHAN0_AMBCOLOR; u8 chan = address - XFMEM_SETCHAN0_AMBCOLOR;
if (xfmem.ambColor[chan] != newValue) if (xfmem.ambColor[chan] != value)
{ {
g_vertex_manager->Flush(); g_vertex_manager->Flush();
VertexShaderManager::SetMaterialColorChanged(chan); VertexShaderManager::SetMaterialColorChanged(chan);
@ -84,7 +77,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_MATCOLOR: case XFMEM_SETCHAN1_MATCOLOR:
{ {
u8 chan = address - XFMEM_SETCHAN0_MATCOLOR; u8 chan = address - XFMEM_SETCHAN0_MATCOLOR;
if (xfmem.matColor[chan] != newValue) if (xfmem.matColor[chan] != value)
{ {
g_vertex_manager->Flush(); g_vertex_manager->Flush();
VertexShaderManager::SetMaterialColorChanged(chan + 2); VertexShaderManager::SetMaterialColorChanged(chan + 2);
@ -96,22 +89,22 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_COLOR: case XFMEM_SETCHAN1_COLOR:
case XFMEM_SETCHAN0_ALPHA: // Channel Alpha case XFMEM_SETCHAN0_ALPHA: // Channel Alpha
case XFMEM_SETCHAN1_ALPHA: case XFMEM_SETCHAN1_ALPHA:
if (((u32*)&xfmem)[address] != (newValue & 0x7fff)) if (((u32*)&xfmem)[address] != (value & 0x7fff))
g_vertex_manager->Flush(); g_vertex_manager->Flush();
VertexShaderManager::SetLightingConfigChanged(); VertexShaderManager::SetLightingConfigChanged();
break; break;
case XFMEM_DUALTEX: case XFMEM_DUALTEX:
if (xfmem.dualTexTrans.enabled != bool(newValue & 1)) if (xfmem.dualTexTrans.enabled != bool(value & 1))
g_vertex_manager->Flush(); g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(-1); VertexShaderManager::SetTexMatrixInfoChanged(-1);
break; break;
case XFMEM_SETMATRIXINDA: case XFMEM_SETMATRIXINDA:
VertexShaderManager::SetTexMatrixChangedA(newValue); VertexShaderManager::SetTexMatrixChangedA(value);
break; break;
case XFMEM_SETMATRIXINDB: case XFMEM_SETMATRIXINDB:
VertexShaderManager::SetTexMatrixChangedB(newValue); VertexShaderManager::SetTexMatrixChangedB(value);
break; break;
case XFMEM_SETVIEWPORT: case XFMEM_SETVIEWPORT:
@ -124,8 +117,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
VertexShaderManager::SetViewportChanged(); VertexShaderManager::SetViewportChanged();
PixelShaderManager::SetViewportChanged(); PixelShaderManager::SetViewportChanged();
GeometryShaderManager::SetViewportChanged(); GeometryShaderManager::SetViewportChanged();
nextAddress = XFMEM_SETVIEWPORT + 6;
break; break;
case XFMEM_SETPROJECTION: case XFMEM_SETPROJECTION:
@ -138,12 +129,10 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
g_vertex_manager->Flush(); g_vertex_manager->Flush();
VertexShaderManager::SetProjectionChanged(); VertexShaderManager::SetProjectionChanged();
GeometryShaderManager::SetProjectionChanged(); GeometryShaderManager::SetProjectionChanged();
nextAddress = XFMEM_SETPROJECTION + 7;
break; break;
case XFMEM_SETNUMTEXGENS: // GXSetNumTexGens case XFMEM_SETNUMTEXGENS: // GXSetNumTexGens
if (xfmem.numTexGen.numTexGens != (newValue & 15)) if (xfmem.numTexGen.numTexGens != (value & 15))
g_vertex_manager->Flush(); g_vertex_manager->Flush();
break; break;
@ -157,8 +146,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETTEXMTXINFO + 7: case XFMEM_SETTEXMTXINFO + 7:
g_vertex_manager->Flush(); g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETTEXMTXINFO); VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETTEXMTXINFO);
nextAddress = XFMEM_SETTEXMTXINFO + 8;
break; break;
case XFMEM_SETPOSTMTXINFO: case XFMEM_SETPOSTMTXINFO:
@ -171,8 +158,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETPOSTMTXINFO + 7: case XFMEM_SETPOSTMTXINFO + 7:
g_vertex_manager->Flush(); g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETPOSTMTXINFO); VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETPOSTMTXINFO);
nextAddress = XFMEM_SETPOSTMTXINFO + 8;
break; break;
// -------------- // --------------
@ -189,7 +174,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case 0x104e: case 0x104e:
case 0x104f: case 0x104f:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, newValue); DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, value);
break; break;
case 0x1013: case 0x1013:
@ -200,83 +185,69 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
default: default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, newValue); WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, value);
break; break;
} }
int transferred = nextAddress - address;
address = nextAddress;
transferSize -= transferred;
dataIndex += transferred;
} }
} }
void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src) void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data)
{ {
// do not allow writes past registers if (base_address > XFMEM_REGISTERS_END)
if (baseAddress + transferSize > XFMEM_REGISTERS_END)
{ {
WARN_LOG_FMT(VIDEO, "XF load exceeds address space: {:x} {} bytes", baseAddress, transferSize); WARN_LOG_FMT(VIDEO, "XF load base address past end of address space: {:x} {} bytes",
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); base_address, transfer_size);
return;
}
if (baseAddress >= XFMEM_REGISTERS_END) u32 end_address = base_address + transfer_size; // exclusive
transferSize = 0;
else // do not allow writes past registers
transferSize = XFMEM_REGISTERS_END - baseAddress; if (end_address > XFMEM_REGISTERS_END)
{
WARN_LOG_FMT(VIDEO, "XF load ends past end of address space: {:x} {} bytes", base_address,
transfer_size);
end_address = XFMEM_REGISTERS_END;
} }
// write to XF mem // write to XF mem
if (baseAddress < XFMEM_REGISTERS_START && transferSize > 0) if (base_address < XFMEM_REGISTERS_START)
{ {
u32 end = baseAddress + transferSize; const u32 xf_mem_base = base_address;
u32 xf_mem_transfer_size = transfer_size;
u32 xfMemBase = baseAddress; if (end_address > XFMEM_REGISTERS_START)
u32 xfMemTransferSize = transferSize;
if (end >= XFMEM_REGISTERS_START)
{ {
xfMemTransferSize = XFMEM_REGISTERS_START - baseAddress; xf_mem_transfer_size = XFMEM_REGISTERS_START - base_address;
base_address = XFMEM_REGISTERS_START;
baseAddress = XFMEM_REGISTERS_START;
transferSize = end - XFMEM_REGISTERS_START;
}
else
{
transferSize = 0;
} }
XFMemWritten(xfMemTransferSize, xfMemBase); XFMemWritten(xf_mem_transfer_size, xf_mem_base);
for (u32 i = 0; i < xfMemTransferSize; i++) for (u32 i = 0; i < xf_mem_transfer_size; i++)
{ {
((u32*)&xfmem)[xfMemBase + i] = src.Read<u32>(); ((u32*)&xfmem)[xf_mem_base + i] = Common::swap32(data);
data += 4;
} }
} }
// write to XF regs // write to XF regs
if (transferSize > 0) if (base_address >= XFMEM_REGISTERS_START)
{ {
XFRegWritten(transferSize, baseAddress, src); for (u32 address = base_address; address < end_address; address++)
for (u32 i = 0; i < transferSize; i++)
{ {
((u32*)&xfmem)[baseAddress + i] = src.Read<u32>(); const u32 value = Common::swap32(data);
XFRegWritten(address, value);
((u32*)&xfmem)[address] = value;
data += 4;
} }
} }
} }
constexpr std::tuple<u32, u32, u32> ExtractIndexedXF(u32 val)
{
const u32 index = val >> 16;
const u32 address = val & 0xFFF; // check mask
const u32 size = ((val >> 12) & 0xF) + 1;
return {index, address, size};
}
// TODO - verify that it is correct. Seems to work, though. // TODO - verify that it is correct. Seems to work, though.
void LoadIndexedXF(u32 val, int refarray) void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size)
{ {
const auto [index, address, size] = ExtractIndexedXF(val);
// load stuff from array to address in xf mem // load stuff from array to address in xf mem
u32* currData = (u32*)(&xfmem) + address; u32* currData = (u32*)(&xfmem) + address;
@ -287,8 +258,8 @@ void LoadIndexedXF(u32 val, int refarray)
} }
else else
{ {
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] + newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] +
g_main_cp_state.array_strides[refarray] * index); g_main_cp_state.array_strides[array] * index);
} }
bool changed = false; bool changed = false;
for (u32 i = 0; i < size; ++i) for (u32 i = 0; i < size; ++i)
@ -307,12 +278,10 @@ void LoadIndexedXF(u32 val, int refarray)
} }
} }
void PreprocessIndexedXF(u32 val, int refarray) void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size)
{ {
const auto [index, address, size] = ExtractIndexedXF(val); const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[array] +
g_preprocess_cp_state.array_strides[array] * index);
const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] +
g_preprocess_cp_state.array_strides[refarray] * index);
const size_t buf_size = size * sizeof(u32); const size_t buf_size = size * sizeof(u32);
Fifo::PushFifoAuxBuffer(new_data, buf_size); Fifo::PushFifoAuxBuffer(new_data, buf_size);
@ -581,13 +550,9 @@ std::string GetXFMemDescription(u32 address, u32 value)
} }
} }
std::pair<std::string, std::string> GetXFTransferInfo(const u8* data) std::pair<std::string, std::string> GetXFTransferInfo(u16 base_address, u8 transfer_size,
const u8* data)
{ {
const u32 cmd = Common::swap32(data);
data += 4;
u32 base_address = cmd & 0xFFFF;
const u32 transfer_size = ((cmd >> 16) & 15) + 1;
if (base_address > XFMEM_REGISTERS_END) if (base_address > XFMEM_REGISTERS_END)
{ {
return std::make_pair("Invalid XF Transfer", "Base address past end of address space"); return std::make_pair("Invalid XF Transfer", "Base address past end of address space");
@ -655,10 +620,9 @@ std::pair<std::string, std::string> GetXFTransferInfo(const u8* data)
return std::make_pair(fmt::to_string(name), fmt::to_string(desc)); return std::make_pair(fmt::to_string(name), fmt::to_string(desc));
} }
std::pair<std::string, std::string> GetXFIndexedLoadInfo(u8 array, u32 value) std::pair<std::string, std::string> GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address,
u8 size)
{ {
const auto [index, address, size] = ExtractIndexedXF(value);
const auto desc = fmt::format("Load {} bytes to XF address {:03x} from CP array {} row {}", size, const auto desc = fmt::format("Load {} bytes to XF address {:03x} from CP array {} row {}", size,
address, array, index); address, array, index);
fmt::memory_buffer written; fmt::memory_buffer written;

View File

@ -11,5 +11,7 @@
std::pair<std::string, std::string> GetXFRegInfo(u32 address, u32 value); std::pair<std::string, std::string> GetXFRegInfo(u32 address, u32 value);
std::string GetXFMemName(u32 address); std::string GetXFMemName(u32 address);
std::string GetXFMemDescription(u32 address, u32 value); std::string GetXFMemDescription(u32 address, u32 value);
std::pair<std::string, std::string> GetXFTransferInfo(const u8* data); std::pair<std::string, std::string> GetXFTransferInfo(u16 base_address, u8 transfer_size,
std::pair<std::string, std::string> GetXFIndexedLoadInfo(u8 array, u32 value); const u8* data);
std::pair<std::string, std::string> GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address,
u8 size);

View File

@ -46,6 +46,12 @@ TEST(EnumUtil, Enum1)
EXPECT_EQ(fmt::format("{:s}", Enum1::C), "0x2u /* C */"); EXPECT_EQ(fmt::format("{:s}", Enum1::C), "0x2u /* C */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(3)), "0x3u /* Invalid */"); EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(3)), "0x3u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(4)), "0x4u /* Invalid */"); EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(4)), "0x4u /* Invalid */");
EXPECT_EQ(fmt::format("{:n}", Enum1::A), "A");
EXPECT_EQ(fmt::format("{:n}", Enum1::B), "B");
EXPECT_EQ(fmt::format("{:n}", Enum1::C), "C");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum1>(3)), "Invalid (3)");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum1>(4)), "Invalid (4)");
} }
TEST(EnumUtil, Enum2) TEST(EnumUtil, Enum2)
@ -63,4 +69,11 @@ TEST(EnumUtil, Enum2)
EXPECT_EQ(fmt::format("{:s}", Enum2::F), "0x3u /* F */"); EXPECT_EQ(fmt::format("{:s}", Enum2::F), "0x3u /* F */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(4)), "0x4u /* Invalid */"); EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(4)), "0x4u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(-1)), "0xffffffffu /* Invalid */"); EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(-1)), "0xffffffffu /* Invalid */");
EXPECT_EQ(fmt::format("{:n}", Enum2::D), "D");
EXPECT_EQ(fmt::format("{:n}", Enum2::E), "E");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(2)), "Invalid (2)");
EXPECT_EQ(fmt::format("{:n}", Enum2::F), "F");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(4)), "Invalid (4)");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(-1)), "Invalid (-1)");
} }

View File

@ -174,8 +174,8 @@ TEST_P(VertexLoaderParamTest, PositionAll)
Input<u8>(i); Input<u8>(i);
else else
Input<u16>(i); Input<u16>(i);
VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer();
g_main_cp_state.array_strides[ARRAY_POSITION] = elem_count * elem_size; g_main_cp_state.array_strides[CPArray::Position] = elem_count * elem_size;
} }
CreateAndCheckSizes(input_size, elem_count * sizeof(float)); CreateAndCheckSizes(input_size, elem_count * sizeof(float));
for (float value : values) for (float value : values)
@ -243,8 +243,8 @@ TEST_F(VertexLoaderTest, PositionIndex16FloatXY)
CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float)); CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float));
Input<u16>(1); Input<u16>(1);
Input<u16>(0); Input<u16>(0);
VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer();
g_main_cp_state.array_strides[ARRAY_POSITION] = sizeof(float); // ;) g_main_cp_state.array_strides[CPArray::Position] = sizeof(float); // ;)
Input(1.f); Input(1.f);
Input(2.f); Input(2.f);
Input(3.f); Input(3.f);
@ -357,8 +357,8 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)
for (int i = 0; i < NUM_VERTEX_COMPONENT_ARRAYS; i++) for (int i = 0; i < NUM_VERTEX_COMPONENT_ARRAYS; i++)
{ {
VertexLoaderManager::cached_arraybases[i] = m_src.GetPointer(); VertexLoaderManager::cached_arraybases[static_cast<CPArray>(i)] = m_src.GetPointer();
g_main_cp_state.array_strides[i] = 129; g_main_cp_state.array_strides[static_cast<CPArray>(i)] = 129;
} }
// This test is only done 100x in a row since it's ~20x slower using the // This test is only done 100x in a row since it's ~20x slower using the