Merge pull request #9718 from Pokechu22/better-fifo-analyzer-part-3

Fifo analyzer improvements, part 3
This commit is contained in:
JMC47 2021-12-20 14:27:14 -05:00 committed by GitHub
commit 32fed91b0d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
66 changed files with 2757 additions and 2695 deletions

View File

@ -55,9 +55,9 @@ public:
constexpr auto parse(fmt::format_parse_context& ctx)
{
auto it = ctx.begin(), end = ctx.end();
// 'u' for user display, 's' for shader generation
if (it != end && (*it == 'u' || *it == 's'))
formatting_for_shader = (*it++ == 's');
// 'u' for user display, 's' for shader generation, 'n' for name only
if (it != end && (*it == 'u' || *it == 's' || *it == 'n'))
format_type = *it++;
return it;
}
@ -68,19 +68,24 @@ public:
const auto value_u = static_cast<std::make_unsigned_t<T>>(value_s); // Always unsigned
const bool has_name = m_names.InBounds(e) && m_names[e] != nullptr;
if (!formatting_for_shader)
switch (format_type)
{
default:
case 'u':
if (has_name)
return fmt::format_to(ctx.out(), "{} ({})", m_names[e], value_s);
else
return fmt::format_to(ctx.out(), "Invalid ({})", value_s);
}
else
{
case 's':
if (has_name)
return fmt::format_to(ctx.out(), "{:#x}u /* {} */", value_u, m_names[e]);
else
return fmt::format_to(ctx.out(), "{:#x}u /* Invalid */", value_u);
case 'n':
if (has_name)
return fmt::format_to(ctx.out(), "{}", m_names[e]);
else
return fmt::format_to(ctx.out(), "Invalid ({})", value_s);
}
}
@ -92,5 +97,5 @@ protected:
private:
const array_type m_names;
bool formatting_for_shader = false;
char format_type = 'u';
};

View File

@ -103,16 +103,10 @@ add_library(core
DSP/LabelMap.h
DSPEmulator.cpp
DSPEmulator.h
FifoPlayer/FifoAnalyzer.cpp
FifoPlayer/FifoAnalyzer.h
FifoPlayer/FifoDataFile.cpp
FifoPlayer/FifoDataFile.h
FifoPlayer/FifoPlaybackAnalyzer.cpp
FifoPlayer/FifoPlaybackAnalyzer.h
FifoPlayer/FifoPlayer.cpp
FifoPlayer/FifoPlayer.h
FifoPlayer/FifoRecordAnalyzer.cpp
FifoPlayer/FifoRecordAnalyzer.h
FifoPlayer/FifoRecorder.cpp
FifoPlayer/FifoRecorder.h
FreeLookConfig.cpp

View File

@ -1,294 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include <numeric>
#include "Common/Assert.h"
#include "Common/MsgHandler.h"
#include "Common/Swap.h"
#include "Core/FifoPlayer/FifoRecordAnalyzer.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoader_TextCoord.h"
namespace FifoAnalyzer
{
namespace
{
u8 ReadFifo8(const u8*& data)
{
const u8 value = data[0];
data += 1;
return value;
}
u16 ReadFifo16(const u8*& data)
{
const u16 value = Common::swap16(data);
data += 2;
return value;
}
u32 ReadFifo32(const u8*& data)
{
const u32 value = Common::swap32(data);
data += 4;
return value;
}
std::array<int, 21> CalculateVertexElementSizes(int vatIndex, const CPMemory& cpMem)
{
const TVtxDesc& vtxDesc = cpMem.vtxDesc;
const VAT& vtxAttr = cpMem.vtxAttr[vatIndex];
// Colors
const std::array<ColorFormat, 2> colComp{
vtxAttr.g0.Color0Comp,
vtxAttr.g0.Color1Comp,
};
const std::array<TexComponentCount, 8> tcElements{
vtxAttr.g0.Tex0CoordElements, vtxAttr.g1.Tex1CoordElements, vtxAttr.g1.Tex2CoordElements,
vtxAttr.g1.Tex3CoordElements, vtxAttr.g1.Tex4CoordElements, vtxAttr.g2.Tex5CoordElements,
vtxAttr.g2.Tex6CoordElements, vtxAttr.g2.Tex7CoordElements,
};
const std::array<ComponentFormat, 8> tcFormat{
vtxAttr.g0.Tex0CoordFormat, vtxAttr.g1.Tex1CoordFormat, vtxAttr.g1.Tex2CoordFormat,
vtxAttr.g1.Tex3CoordFormat, vtxAttr.g1.Tex4CoordFormat, vtxAttr.g2.Tex5CoordFormat,
vtxAttr.g2.Tex6CoordFormat, vtxAttr.g2.Tex7CoordFormat,
};
std::array<int, 21> sizes{};
// Add position and texture matrix indices
sizes[0] = vtxDesc.low.PosMatIdx;
for (size_t i = 0; i < vtxDesc.low.TexMatIdx.Size(); ++i)
{
sizes[i + 1] = vtxDesc.low.TexMatIdx[i];
}
// Position
sizes[9] = VertexLoader_Position::GetSize(vtxDesc.low.Position, vtxAttr.g0.PosFormat,
vtxAttr.g0.PosElements);
// Normals
if (vtxDesc.low.Normal != VertexComponentFormat::NotPresent)
{
sizes[10] = VertexLoader_Normal::GetSize(vtxDesc.low.Normal, vtxAttr.g0.NormalFormat,
vtxAttr.g0.NormalElements, vtxAttr.g0.NormalIndex3);
}
else
{
sizes[10] = 0;
}
// Colors
for (size_t i = 0; i < vtxDesc.low.Color.Size(); i++)
{
int size = 0;
switch (vtxDesc.low.Color[i])
{
case VertexComponentFormat::NotPresent:
break;
case VertexComponentFormat::Direct:
switch (colComp[i])
{
case ColorFormat::RGB565:
size = 2;
break;
case ColorFormat::RGB888:
size = 3;
break;
case ColorFormat::RGB888x:
size = 4;
break;
case ColorFormat::RGBA4444:
size = 2;
break;
case ColorFormat::RGBA6666:
size = 3;
break;
case ColorFormat::RGBA8888:
size = 4;
break;
default:
ASSERT(0);
break;
}
break;
case VertexComponentFormat::Index8:
size = 1;
break;
case VertexComponentFormat::Index16:
size = 2;
break;
}
sizes[11 + i] = size;
}
// Texture coordinates
for (size_t i = 0; i < tcFormat.size(); i++)
{
sizes[13 + i] =
VertexLoader_TextCoord::GetSize(vtxDesc.high.TexCoord[i], tcFormat[i], tcElements[i]);
}
return sizes;
}
} // Anonymous namespace
bool s_DrawingObject;
FifoAnalyzer::CPMemory s_CpMem;
u32 AnalyzeCommand(const u8* data, DecodeMode mode)
{
const u8* dataStart = data;
int cmd = ReadFifo8(data);
switch (cmd)
{
case OpcodeDecoder::GX_NOP:
case OpcodeDecoder::GX_CMD_UNKNOWN_METRICS:
case OpcodeDecoder::GX_CMD_INVL_VC:
break;
case OpcodeDecoder::GX_LOAD_CP_REG:
{
s_DrawingObject = false;
u32 cmd2 = ReadFifo8(data);
u32 value = ReadFifo32(data);
LoadCPReg(cmd2, value, s_CpMem);
break;
}
case OpcodeDecoder::GX_LOAD_XF_REG:
{
s_DrawingObject = false;
u32 cmd2 = ReadFifo32(data);
u8 streamSize = ((cmd2 >> 16) & 15) + 1;
data += streamSize * 4;
break;
}
case OpcodeDecoder::GX_LOAD_INDX_A:
case OpcodeDecoder::GX_LOAD_INDX_B:
case OpcodeDecoder::GX_LOAD_INDX_C:
case OpcodeDecoder::GX_LOAD_INDX_D:
{
s_DrawingObject = false;
int array = 0xc + (cmd - OpcodeDecoder::GX_LOAD_INDX_A) / 8;
u32 value = ReadFifo32(data);
if (mode == DecodeMode::Record)
FifoRecordAnalyzer::ProcessLoadIndexedXf(value, array);
break;
}
case OpcodeDecoder::GX_CMD_CALL_DL:
// The recorder should have expanded display lists into the fifo stream and skipped the call to
// start them
// That is done to make it easier to track where memory is updated
ASSERT(false);
data += 8;
break;
case OpcodeDecoder::GX_LOAD_BP_REG:
{
s_DrawingObject = false;
ReadFifo32(data);
break;
}
default:
if (cmd & 0x80)
{
s_DrawingObject = true;
const std::array<int, 21> sizes =
CalculateVertexElementSizes(cmd & OpcodeDecoder::GX_VAT_MASK, s_CpMem);
// Determine offset of each element that might be a vertex array
// The first 9 elements are never vertex arrays so we just accumulate their sizes.
int offset = std::accumulate(sizes.begin(), sizes.begin() + 9, 0u);
std::array<int, NUM_VERTEX_COMPONENT_ARRAYS> offsets;
for (size_t i = 0; i < offsets.size(); ++i)
{
offsets[i] = offset;
offset += sizes[i + 9];
}
const int vertexSize = offset;
const int numVertices = ReadFifo16(data);
if (mode == DecodeMode::Record && numVertices > 0)
{
for (size_t i = 0; i < offsets.size(); ++i)
{
FifoRecordAnalyzer::WriteVertexArray(static_cast<int>(i), data + offsets[i], vertexSize,
numVertices);
}
}
data += numVertices * vertexSize;
}
else
{
PanicAlertFmt("FifoPlayer: Unknown Opcode ({:#x}).\n", cmd);
return 0;
}
break;
}
return (u32)(data - dataStart);
}
void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem)
{
switch (subCmd & CP_COMMAND_MASK)
{
case VCD_LO:
cpMem.vtxDesc.low.Hex = value;
break;
case VCD_HI:
cpMem.vtxDesc.high.Hex = value;
break;
case CP_VAT_REG_A:
ASSERT(subCmd - CP_VAT_REG_A < CP_NUM_VAT_REG);
cpMem.vtxAttr[subCmd & CP_VAT_MASK].g0.Hex = value;
break;
case CP_VAT_REG_B:
ASSERT(subCmd - CP_VAT_REG_B < CP_NUM_VAT_REG);
cpMem.vtxAttr[subCmd & CP_VAT_MASK].g1.Hex = value;
break;
case CP_VAT_REG_C:
ASSERT(subCmd - CP_VAT_REG_C < CP_NUM_VAT_REG);
cpMem.vtxAttr[subCmd & CP_VAT_MASK].g2.Hex = value;
break;
case ARRAY_BASE:
cpMem.arrayBases[subCmd & CP_ARRAY_MASK] = value;
break;
case ARRAY_STRIDE:
cpMem.arrayStrides[subCmd & CP_ARRAY_MASK] = value & 0xFF;
break;
}
}
} // namespace FifoAnalyzer

View File

@ -1,33 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h"
namespace FifoAnalyzer
{
enum class DecodeMode
{
Record,
Playback,
};
u32 AnalyzeCommand(const u8* data, DecodeMode mode);
struct CPMemory
{
TVtxDesc vtxDesc;
std::array<VAT, CP_NUM_VAT_REG> vtxAttr;
std::array<u32, CP_NUM_ARRAYS> arrayBases{};
std::array<u32, CP_NUM_ARRAYS> arrayStrides{};
};
void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem);
extern bool s_DrawingObject;
extern FifoAnalyzer::CPMemory s_CpMem;
} // namespace FifoAnalyzer

View File

@ -1,111 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include <vector>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoDataFile.h"
using namespace FifoAnalyzer;
// For debugging
#define LOG_FIFO_CMDS 0
struct CmdData
{
u32 size;
u32 offset;
const u8* ptr;
};
void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file,
std::vector<AnalyzedFrameInfo>& frameInfo)
{
u32* cpMem = file->GetCPMem();
FifoAnalyzer::LoadCPReg(VCD_LO, cpMem[VCD_LO], s_CpMem);
FifoAnalyzer::LoadCPReg(VCD_HI, cpMem[VCD_HI], s_CpMem);
for (u32 i = 0; i < CP_NUM_VAT_REG; ++i)
{
FifoAnalyzer::LoadCPReg(CP_VAT_REG_A + i, cpMem[CP_VAT_REG_A + i], s_CpMem);
FifoAnalyzer::LoadCPReg(CP_VAT_REG_B + i, cpMem[CP_VAT_REG_B + i], s_CpMem);
FifoAnalyzer::LoadCPReg(CP_VAT_REG_C + i, cpMem[CP_VAT_REG_C + i], s_CpMem);
}
frameInfo.clear();
frameInfo.resize(file->GetFrameCount());
for (u32 frameIdx = 0; frameIdx < file->GetFrameCount(); ++frameIdx)
{
const FifoFrameInfo& frame = file->GetFrame(frameIdx);
AnalyzedFrameInfo& analyzed = frameInfo[frameIdx];
s_DrawingObject = false;
u32 cmdStart = 0;
u32 nextMemUpdate = 0;
#if LOG_FIFO_CMDS
// Debugging
std::vector<CmdData> prevCmds;
#endif
while (cmdStart < frame.fifoData.size())
{
// Add memory updates that have occurred before this point in the frame
while (nextMemUpdate < frame.memoryUpdates.size() &&
frame.memoryUpdates[nextMemUpdate].fifoPosition <= cmdStart)
{
analyzed.memoryUpdates.push_back(frame.memoryUpdates[nextMemUpdate]);
++nextMemUpdate;
}
const bool wasDrawing = s_DrawingObject;
const u32 cmdSize =
FifoAnalyzer::AnalyzeCommand(&frame.fifoData[cmdStart], DecodeMode::Playback);
#if LOG_FIFO_CMDS
CmdData cmdData;
cmdData.offset = cmdStart;
cmdData.ptr = &frame.fifoData[cmdStart];
cmdData.size = cmdSize;
prevCmds.push_back(cmdData);
#endif
// Check for error
if (cmdSize == 0)
{
// Clean up frame analysis
analyzed.objectStarts.clear();
analyzed.objectCPStates.clear();
analyzed.objectEnds.clear();
return;
}
if (wasDrawing != s_DrawingObject)
{
if (s_DrawingObject)
{
analyzed.objectStarts.push_back(cmdStart);
analyzed.objectCPStates.push_back(s_CpMem);
}
else
{
analyzed.objectEnds.push_back(cmdStart);
}
}
cmdStart += cmdSize;
}
if (analyzed.objectEnds.size() < analyzed.objectStarts.size())
analyzed.objectEnds.push_back(cmdStart);
ASSERT(analyzed.objectStarts.size() == analyzed.objectCPStates.size());
ASSERT(analyzed.objectStarts.size() == analyzed.objectEnds.size());
}
}

View File

@ -1,25 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string>
#include <vector>
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoDataFile.h"
struct AnalyzedFrameInfo
{
// Start of the primitives for the object (after previous update commands)
std::vector<u32> objectStarts;
std::vector<FifoAnalyzer::CPMemory> objectCPStates;
// End of the primitives for the object
std::vector<u32> objectEnds;
std::vector<MemoryUpdate> memoryUpdates;
};
namespace FifoPlaybackAnalyzer
{
void AnalyzeFrames(FifoDataFile* file, std::vector<AnalyzedFrameInfo>& frameInfo);
} // namespace FifoPlaybackAnalyzer

View File

@ -4,6 +4,7 @@
#include "Core/FifoPlayer/FifoPlayer.h"
#include <algorithm>
#include <cstring>
#include <mutex>
#include "Common/Assert.h"
@ -12,7 +13,6 @@
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/HW/CPU.h"
#include "Core/HW/GPFifo.h"
@ -31,6 +31,136 @@
// TODO: Move texMem somewhere else so this isn't an issue.
#include "VideoCommon/TextureDecoder.h"
namespace
{
class FifoPlaybackAnalyzer : public OpcodeDecoder::Callback
{
public:
static void AnalyzeFrames(FifoDataFile* file, std::vector<AnalyzedFrameInfo>& frame_info);
explicit FifoPlaybackAnalyzer(const u32* cpmem) : m_cpmem(cpmem) {}
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); }
OPCODE_CALLBACK(void OnBP(u8 command, u32 value));
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) {}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data));
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) {}
OPCODE_CALLBACK(void OnNop(u32 count));
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size));
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
bool m_start_of_primitives = false;
bool m_end_of_primitives = false;
bool m_efb_copy = false;
// Internal state, copied to above in OnCommand
bool m_was_primitive = false;
bool m_is_primitive = false;
bool m_is_copy = false;
bool m_is_nop = false;
CPState m_cpmem;
};
void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file,
std::vector<AnalyzedFrameInfo>& frame_info)
{
FifoPlaybackAnalyzer analyzer(file->GetCPMem());
frame_info.clear();
frame_info.resize(file->GetFrameCount());
for (u32 frame_no = 0; frame_no < file->GetFrameCount(); frame_no++)
{
const FifoFrameInfo& frame = file->GetFrame(frame_no);
AnalyzedFrameInfo& analyzed = frame_info[frame_no];
u32 offset = 0;
u32 part_start = 0;
CPState cpmem;
while (offset < frame.fifoData.size())
{
const u32 cmd_size = OpcodeDecoder::RunCommand(&frame.fifoData[offset],
u32(frame.fifoData.size()) - offset, analyzer);
if (analyzer.m_start_of_primitives)
{
// Start of primitive data for an object
analyzed.AddPart(FramePartType::Commands, part_start, offset, analyzer.m_cpmem);
part_start = offset;
// Copy cpmem now, because end_of_primitives isn't triggered until the first opcode after
// primitive data, and the first opcode might update cpmem
std::memcpy(&cpmem, &analyzer.m_cpmem, sizeof(CPState));
}
if (analyzer.m_end_of_primitives)
{
// End of primitive data for an object, and thus end of the object
analyzed.AddPart(FramePartType::PrimitiveData, part_start, offset, cpmem);
part_start = offset;
}
offset += cmd_size;
if (analyzer.m_efb_copy)
{
// We increase the offset beforehand, so that the trigger EFB copy command is included.
analyzed.AddPart(FramePartType::EFBCopy, part_start, offset, analyzer.m_cpmem);
part_start = offset;
}
}
// The frame should end with an EFB copy, so part_start should have been updated to the end.
ASSERT(part_start == frame.fifoData.size());
ASSERT(offset == frame.fifoData.size());
}
}
void FifoPlaybackAnalyzer::OnBP(u8 command, u32 value)
{
if (command == BPMEM_TRIGGER_EFB_COPY)
m_is_copy = true;
}
void FifoPlaybackAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data)
{
m_is_primitive = true;
}
void FifoPlaybackAnalyzer::OnNop(u32 count)
{
m_is_nop = true;
}
void FifoPlaybackAnalyzer::OnCommand(const u8* data, u32 size)
{
m_start_of_primitives = false;
m_end_of_primitives = false;
m_efb_copy = false;
if (!m_is_nop)
{
if (m_is_primitive && !m_was_primitive)
m_start_of_primitives = true;
else if (m_was_primitive && !m_is_primitive)
m_end_of_primitives = true;
else if (m_is_copy)
m_efb_copy = true;
m_was_primitive = m_is_primitive;
}
m_is_primitive = false;
m_is_copy = false;
m_is_nop = false;
}
} // namespace
bool IsPlayingBackFifologWithBrokenEFBCopies = false;
FifoPlayer::FifoPlayer() : m_Loop{SConfig::GetInstance().bLoopFifoReplay}
@ -191,7 +321,7 @@ u32 FifoPlayer::GetMaxObjectCount() const
u32 result = 0;
for (auto& frame : m_FrameInfo)
{
const u32 count = static_cast<u32>(frame.objectStarts.size());
const u32 count = frame.part_type_counts[FramePartType::PrimitiveData];
if (count > result)
result = count;
}
@ -202,7 +332,7 @@ u32 FifoPlayer::GetFrameObjectCount(u32 frame) const
{
if (frame < m_FrameInfo.size())
{
return static_cast<u32>(m_FrameInfo[frame].objectStarts.size());
return m_FrameInfo[frame].part_type_counts[FramePartType::PrimitiveData];
}
return 0;
@ -262,55 +392,35 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo&
m_ElapsedCycles = 0;
m_FrameFifoSize = static_cast<u32>(frame.fifoData.size());
// Determine start and end objects
u32 numObjects = (u32)(info.objectStarts.size());
u32 drawStart = std::min(numObjects, m_ObjectRangeStart);
u32 drawEnd = std::min(numObjects - 1, m_ObjectRangeEnd);
u32 memory_update = 0;
u32 object_num = 0;
u32 position = 0;
u32 memoryUpdate = 0;
// Skip memory updates during frame if true
// Skip all memory updates if early memory updates are enabled, as we already wrote them
if (m_EarlyMemoryUpdates)
{
memoryUpdate = (u32)(frame.memoryUpdates.size());
memory_update = (u32)(frame.memoryUpdates.size());
}
if (numObjects > 0)
for (const FramePart& part : info.parts)
{
u32 objectNum = 0;
bool show_part;
// Write fifo data skipping objects before the draw range
while (objectNum < drawStart)
if (part.m_type == FramePartType::PrimitiveData)
{
WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info);
position = info.objectEnds[objectNum];
++objectNum;
show_part = m_ObjectRangeStart <= object_num && object_num <= m_ObjectRangeEnd;
object_num++;
}
else
{
// We always include commands and EFB copies, as commands from earlier objects still apply to
// later ones (games generally do not reconfigure everything for each object)
show_part = true;
}
// Write objects in draw range
if (objectNum < numObjects && drawStart <= drawEnd)
{
objectNum = drawEnd;
WriteFramePart(position, info.objectEnds[objectNum], memoryUpdate, frame, info);
position = info.objectEnds[objectNum];
++objectNum;
}
// Write fifo data skipping objects after the draw range
while (objectNum < numObjects)
{
WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info);
position = info.objectEnds[objectNum];
++objectNum;
}
if (show_part)
WriteFramePart(part, &memory_update, frame);
}
// Write data after the last object
WriteFramePart(position, static_cast<u32>(frame.fifoData.size()), memoryUpdate, frame, info);
FlushWGP();
// Sleep while the GPU is active
@ -321,36 +431,39 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo&
}
}
void FifoPlayer::WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate,
const FifoFrameInfo& frame, const AnalyzedFrameInfo& info)
void FifoPlayer::WriteFramePart(const FramePart& part, u32* next_mem_update,
const FifoFrameInfo& frame)
{
const u8* const data = frame.fifoData.data();
while (nextMemUpdate < frame.memoryUpdates.size() && dataStart < dataEnd)
{
const MemoryUpdate& memUpdate = info.memoryUpdates[nextMemUpdate];
u32 data_start = part.m_start;
const u32 data_end = part.m_end;
if (memUpdate.fifoPosition < dataEnd)
while (*next_mem_update < frame.memoryUpdates.size() && data_start < data_end)
{
const MemoryUpdate& memUpdate = frame.memoryUpdates[*next_mem_update];
if (memUpdate.fifoPosition < data_end)
{
if (dataStart < memUpdate.fifoPosition)
if (data_start < memUpdate.fifoPosition)
{
WriteFifo(data, dataStart, memUpdate.fifoPosition);
dataStart = memUpdate.fifoPosition;
WriteFifo(data, data_start, memUpdate.fifoPosition);
data_start = memUpdate.fifoPosition;
}
WriteMemory(memUpdate);
++nextMemUpdate;
++*next_mem_update;
}
else
{
WriteFifo(data, dataStart, dataEnd);
dataStart = dataEnd;
WriteFifo(data, data_start, data_end);
data_start = data_end;
}
}
if (dataStart < dataEnd)
WriteFifo(data, dataStart, dataEnd);
if (data_start < data_end)
WriteFifo(data, data_start, data_end);
}
void FifoPlayer::WriteAllMemoryUpdates()

View File

@ -5,16 +5,18 @@
#include <functional>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "Common/Assert.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include "Core/PowerPC/CPUCoreBase.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/OpcodeDecoding.h"
class FifoDataFile;
struct MemoryUpdate;
struct AnalyzedFrameInfo;
namespace CPU
{
@ -43,16 +45,46 @@ enum class State;
// 8. The output of fifoplayer would be wrong.
// To keep compatibility with old fifologs, we have this flag which signals texture cache to not
// bother
// hashing the memory and just assume the hash matched.
// bother hashing the memory and just assume the hash matched.
// At a later point proper efb copy support should be added to fiforecorder and this flag will
// change
// based on the version of the .dff file, but until then it will always be true when a fifolog is
// playing.
// change based on the version of the .dff file, but until then it will always be true when a
// fifolog is playing.
// Shitty global to fix a shitty problem
extern bool IsPlayingBackFifologWithBrokenEFBCopies;
enum class FramePartType
{
Commands,
PrimitiveData,
EFBCopy,
};
struct FramePart
{
constexpr FramePart(FramePartType type, u32 start, u32 end, const CPState& cpmem)
: m_type(type), m_start(start), m_end(end), m_cpmem(cpmem)
{
}
const FramePartType m_type;
const u32 m_start;
const u32 m_end;
const CPState m_cpmem;
};
struct AnalyzedFrameInfo
{
std::vector<FramePart> parts;
Common::EnumMap<u32, FramePartType::EFBCopy> part_type_counts;
void AddPart(FramePartType type, u32 start, u32 end, const CPState& cpmem)
{
parts.emplace_back(type, start, end, cpmem);
part_type_counts[type]++;
}
};
class FifoPlayer
{
public:
@ -102,14 +134,12 @@ public:
private:
class CPUCore;
FifoPlayer();
CPU::State AdvanceFrame();
void WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& info);
void WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, const FifoFrameInfo& frame,
const AnalyzedFrameInfo& info);
void WriteFramePart(const FramePart& part, u32* next_mem_update, const FifoFrameInfo& frame);
void WriteAllMemoryUpdates();
void WriteMemory(const MemoryUpdate& memUpdate);

View File

@ -1,103 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/FifoPlayer/FifoRecordAnalyzer.h"
#include <algorithm>
#include "Common/MsgHandler.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
using namespace FifoAnalyzer;
void FifoRecordAnalyzer::Initialize(const u32* cpMem)
{
s_DrawingObject = false;
FifoAnalyzer::LoadCPReg(VCD_LO, cpMem[VCD_LO], s_CpMem);
FifoAnalyzer::LoadCPReg(VCD_HI, cpMem[VCD_HI], s_CpMem);
for (u32 i = 0; i < CP_NUM_VAT_REG; ++i)
FifoAnalyzer::LoadCPReg(CP_VAT_REG_A + i, cpMem[CP_VAT_REG_A + i], s_CpMem);
const u32* const bases_start = cpMem + ARRAY_BASE;
const u32* const bases_end = bases_start + s_CpMem.arrayBases.size();
std::copy(bases_start, bases_end, s_CpMem.arrayBases.begin());
const u32* const strides_start = cpMem + ARRAY_STRIDE;
const u32* const strides_end = strides_start + s_CpMem.arrayStrides.size();
std::copy(strides_start, strides_end, s_CpMem.arrayStrides.begin());
}
void FifoRecordAnalyzer::ProcessLoadIndexedXf(u32 val, int array)
{
int index = val >> 16;
int size = ((val >> 12) & 0xF) + 1;
u32 address = s_CpMem.arrayBases[array] + s_CpMem.arrayStrides[array] * index;
FifoRecorder::GetInstance().UseMemory(address, size * 4, MemoryUpdate::XF_DATA);
}
void FifoRecordAnalyzer::WriteVertexArray(int arrayIndex, const u8* vertexData, int vertexSize,
int numVertices)
{
// Skip if not indexed array
VertexComponentFormat arrayType;
if (arrayIndex == ARRAY_POSITION)
arrayType = s_CpMem.vtxDesc.low.Position;
else if (arrayIndex == ARRAY_NORMAL)
arrayType = s_CpMem.vtxDesc.low.Normal;
else if (arrayIndex >= ARRAY_COLOR0 && arrayIndex < ARRAY_COLOR0 + NUM_COLOR_ARRAYS)
arrayType = s_CpMem.vtxDesc.low.Color[arrayIndex - ARRAY_COLOR0];
else if (arrayIndex >= ARRAY_TEXCOORD0 && arrayIndex < ARRAY_TEXCOORD0 + NUM_TEXCOORD_ARRAYS)
arrayType = s_CpMem.vtxDesc.high.TexCoord[arrayIndex - ARRAY_TEXCOORD0];
else
{
PanicAlertFmt("Invalid arrayIndex {}", arrayIndex);
return;
}
if (!IsIndexed(arrayType))
return;
int maxIndex = 0;
// Determine min and max indices
if (arrayType == VertexComponentFormat::Index8)
{
for (int i = 0; i < numVertices; ++i)
{
int index = *vertexData;
vertexData += vertexSize;
// 0xff skips the vertex
if (index != 0xff)
{
if (index > maxIndex)
maxIndex = index;
}
}
}
else
{
for (int i = 0; i < numVertices; ++i)
{
int index = Common::swap16(vertexData);
vertexData += vertexSize;
// 0xffff skips the vertex
if (index != 0xffff)
{
if (index > maxIndex)
maxIndex = index;
}
}
}
u32 arrayStart = s_CpMem.arrayBases[arrayIndex];
u32 arraySize = s_CpMem.arrayStrides[arrayIndex] * (maxIndex + 1);
FifoRecorder::GetInstance().UseMemory(arrayStart, arraySize, MemoryUpdate::VERTEX_STREAM);
}

View File

@ -1,15 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "Common/CommonTypes.h"
namespace FifoRecordAnalyzer
{
// Must call this before analyzing Fifo commands with FifoAnalyzer::AnalyzeCommand()
void Initialize(const u32* cpMem);
void ProcessLoadIndexedXf(u32 val, int array);
void WriteVertexArray(int arrayIndex, const u8* vertexData, int vertexSize, int numVertices);
} // namespace FifoRecordAnalyzer

View File

@ -6,13 +6,168 @@
#include <algorithm>
#include <cstring>
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/Thread.h"
#include "Core/ConfigManager.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoRecordAnalyzer.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/XFStructs.h"
class FifoRecorder::FifoRecordAnalyzer : public OpcodeDecoder::Callback
{
public:
explicit FifoRecordAnalyzer(FifoRecorder* owner) : m_owner(owner) {}
explicit FifoRecordAnalyzer(FifoRecorder* owner, const u32* cpmem)
: m_owner(owner), m_cpmem(cpmem)
{
}
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); }
OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) {}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size));
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data));
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size))
{
WARN_LOG_FMT(VIDEO,
"Unhandled display list call {:08x} {:08x}; should have been inlined earlier",
address, size);
}
OPCODE_CALLBACK(void OnNop(u32 count)) {}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {}
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
private:
void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size, u16 num_vertices,
const u8* vertex_data);
FifoRecorder* const m_owner;
CPState m_cpmem;
};
void FifoRecorder::FifoRecordAnalyzer::OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)
{
const u32 load_address = m_cpmem.array_bases[array] + m_cpmem.array_strides[array] * index;
m_owner->UseMemory(load_address, size * sizeof(u32), MemoryUpdate::XF_DATA);
}
// TODO: The following code is copied with modifications from VertexLoaderBase.
// Surely there's a better solution?
#include "VideoCommon/VertexLoader_Color.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoader_TextCoord.h"
void FifoRecorder::FifoRecordAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive,
u8 vat, u32 vertex_size, u16 num_vertices,
const u8* vertex_data)
{
const auto& vtx_desc = m_cpmem.vtx_desc;
const auto& vtx_attr = m_cpmem.vtx_attr[vat];
u32 offset = 0;
if (vtx_desc.low.PosMatIdx)
offset++;
for (auto texmtxidx : vtx_desc.low.TexMatIdx)
{
if (texmtxidx)
offset++;
}
const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
vtx_attr.g0.PosElements);
ProcessVertexComponent(CPArray::Position, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += pos_size;
const u32 norm_size =
VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3);
ProcessVertexComponent(CPArray::Normal, vtx_desc.low.Position, offset, vertex_size, num_vertices,
vertex_data);
offset += norm_size;
for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
{
const u32 color_size =
VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i));
ProcessVertexComponent(CPArray::Color0 + i, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += color_size;
}
for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
{
const u32 tc_size = VertexLoader_TextCoord::GetSize(
vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i));
ProcessVertexComponent(CPArray::TexCoord0 + i, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += tc_size;
}
ASSERT(offset == vertex_size);
}
// If a component is indexed, the array it indexes into for data must be saved.
void FifoRecorder::FifoRecordAnalyzer::ProcessVertexComponent(CPArray array_index,
VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size,
u16 num_vertices,
const u8* vertex_data)
{
// Skip if not indexed array
if (!IsIndexed(array_type))
return;
u16 max_index = 0;
// Determine min and max indices
if (array_type == VertexComponentFormat::Index8)
{
for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
const u8 index = vertex_data[component_offset];
vertex_data += vertex_size;
// 0xff skips the vertex
if (index != 0xff)
{
if (index > max_index)
max_index = index;
}
}
}
else
{
for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
const u16 index = Common::swap16(&vertex_data[component_offset]);
vertex_data += vertex_size;
// 0xffff skips the vertex
if (index != 0xffff)
{
if (index > max_index)
max_index = index;
}
}
}
const u32 array_start = m_cpmem.array_bases[array_index];
const u32 array_size = m_cpmem.array_strides[array_index] * (max_index + 1);
m_owner->UseMemory(array_start, array_size, MemoryUpdate::VERTEX_STREAM);
}
static FifoRecorder instance;
FifoRecorder::FifoRecorder() = default;
@ -76,7 +231,7 @@ void FifoRecorder::WriteGPCommand(const u8* data, u32 size)
{
// Assumes data contains all information for the command
// Calls FifoRecorder::UseMemory
const u32 analyzed_size = FifoAnalyzer::AnalyzeCommand(data, FifoAnalyzer::DecodeMode::Record);
const u32 analyzed_size = OpcodeDecoder::RunCommand(data, size, *m_record_analyzer);
// Make sure FifoPlayer's command analyzer agrees about the size of the command.
if (analyzed_size != size)
@ -211,7 +366,7 @@ void FifoRecorder::SetVideoMemory(const u32* bpMem, const u32* cpMem, const u32*
memcpy(m_File->GetTexMem(), texMem, FifoDataFile::TEX_MEM_SIZE);
}
FifoRecordAnalyzer::Initialize(cpMem);
m_record_analyzer = std::make_unique<FifoRecordAnalyzer>(this, cpMem);
}
bool FifoRecorder::IsRecording() const

View File

@ -8,6 +8,7 @@
#include <mutex>
#include <vector>
#include "Common/Assert.h"
#include "Core/FifoPlayer/FifoDataFile.h"
class FifoRecorder
@ -47,6 +48,8 @@ public:
static FifoRecorder& GetInstance();
private:
class FifoRecordAnalyzer;
// Accessed from both GUI and video threads
std::recursive_mutex m_mutex;
@ -65,6 +68,7 @@ private:
bool m_SkipFutureData = true;
bool m_FrameEnded = false;
FifoFrameInfo m_CurrentFrame;
std::unique_ptr<FifoRecordAnalyzer> m_record_analyzer;
std::vector<u8> m_FifoData;
std::vector<u8> m_Ram;
std::vector<u8> m_ExRam;

View File

@ -217,11 +217,8 @@
<ClInclude Include="Core\DSP\Jit\x64\DSPJitTables.h" />
<ClInclude Include="Core\DSP\LabelMap.h" />
<ClInclude Include="Core\DSPEmulator.h" />
<ClInclude Include="Core\FifoPlayer\FifoAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoDataFile.h" />
<ClInclude Include="Core\FifoPlayer\FifoPlaybackAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoPlayer.h" />
<ClInclude Include="Core\FifoPlayer\FifoRecordAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoRecorder.h" />
<ClInclude Include="Core\FreeLookConfig.h" />
<ClInclude Include="Core\FreeLookManager.h" />
@ -815,11 +812,8 @@
<ClCompile Include="Core\DSP\Jit\x64\DSPJitUtil.cpp" />
<ClCompile Include="Core\DSP\LabelMap.cpp" />
<ClCompile Include="Core\DSPEmulator.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoDataFile.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoPlaybackAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoPlayer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoRecordAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoRecorder.cpp" />
<ClCompile Include="Core\FreeLookConfig.cpp" />
<ClCompile Include="Core\FreeLookManager.cpp" />

View File

@ -3,6 +3,8 @@
#include "DolphinQt/FIFO/FIFOAnalyzer.h"
#include <algorithm>
#include <QGroupBox>
#include <QHBoxLayout>
#include <QHeaderView>
@ -27,8 +29,12 @@
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/XFStructs.h"
// Values range from 0 to number of frames - 1
constexpr int FRAME_ROLE = Qt::UserRole;
constexpr int OBJECT_ROLE = Qt::UserRole + 1;
// Values range from 0 to number of parts - 1
constexpr int PART_START_ROLE = Qt::UserRole + 1;
// Values range from 1 to number of parts
constexpr int PART_END_ROLE = Qt::UserRole + 2;
FIFOAnalyzer::FIFOAnalyzer()
{
@ -144,43 +150,175 @@ void FIFOAnalyzer::UpdateTree()
auto* file = FifoPlayer::GetInstance().GetFile();
const u32 frame_count = file->GetFrameCount();
for (u32 frame = 0; frame < frame_count; frame++)
{
auto* frame_item = new QTreeWidgetItem({tr("Frame %1").arg(frame)});
recording_item->addChild(frame_item);
const u32 object_count = FifoPlayer::GetInstance().GetFrameObjectCount(frame);
for (u32 object = 0; object < object_count; object++)
const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame);
ASSERT(frame_info.parts.size() != 0);
Common::EnumMap<u32, FramePartType::EFBCopy> part_counts;
u32 part_start = 0;
for (u32 part_nr = 0; part_nr < frame_info.parts.size(); part_nr++)
{
auto* object_item = new QTreeWidgetItem({tr("Object %1").arg(object)});
const auto& part = frame_info.parts[part_nr];
frame_item->addChild(object_item);
const u32 part_type_nr = part_counts[part.m_type];
part_counts[part.m_type]++;
object_item->setData(0, FRAME_ROLE, frame);
object_item->setData(0, OBJECT_ROLE, object);
QTreeWidgetItem* object_item = nullptr;
if (part.m_type == FramePartType::PrimitiveData)
object_item = new QTreeWidgetItem({tr("Object %1").arg(part_type_nr)});
else if (part.m_type == FramePartType::EFBCopy)
object_item = new QTreeWidgetItem({tr("EFB copy %1").arg(part_type_nr)});
// We don't create dedicated labels for FramePartType::Command;
// those are grouped with the primitive
if (object_item != nullptr)
{
frame_item->addChild(object_item);
object_item->setData(0, FRAME_ROLE, frame);
object_item->setData(0, PART_START_ROLE, part_start);
object_item->setData(0, PART_END_ROLE, part_nr);
part_start = part_nr + 1;
}
}
// We shouldn't end on a Command (it should end with an EFB copy)
ASSERT(part_start == frame_info.parts.size());
// The counts we computed should match the frame's counts
ASSERT(std::equal(frame_info.part_type_counts.begin(), frame_info.part_type_counts.end(),
part_counts.begin()));
}
}
static std::string GetPrimitiveName(u8 cmd)
namespace
{
if ((cmd & 0xC0) != 0x80)
class DetailCallback : public OpcodeDecoder::Callback
{
public:
explicit DetailCallback(CPState cpmem) : m_cpmem(cpmem) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
{
PanicAlertFmt("Not a primitive command: {:#04x}", cmd);
return "";
// Note: No need to update m_cpmem as it already has the final value for this object
const auto [name, desc] = GetCPRegInfo(command, value);
ASSERT(!name.empty());
text = QStringLiteral("CP %1 %2 %3")
.arg(command, 2, 16, QLatin1Char('0'))
.arg(value, 8, 16, QLatin1Char('0'))
.arg(QString::fromStdString(name));
}
const u8 vat = cmd & OpcodeDecoder::GX_VAT_MASK; // Vertex loader index (0 - 7)
const u8 primitive =
(cmd & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT;
static constexpr std::array<const char*, 8> names = {
"GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)",
"GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP",
"GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES",
"GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS",
};
return fmt::format("{} VAT {}", names[primitive], vat);
}
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
{
const auto [name, desc] = GetXFTransferInfo(address, count, data);
ASSERT(!name.empty());
const u32 command = address | (count << 16);
text = QStringLiteral("XF %1 ").arg(command, 8, 16, QLatin1Char('0'));
for (u8 i = 0; i < count; i++)
{
const u32 value = Common::swap32(&data[i * 4]);
text += QStringLiteral("%1 ").arg(value, 8, 16, QLatin1Char('0'));
}
text += QStringLiteral(" ") + QString::fromStdString(name);
}
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
{
const auto [name, desc] = GetBPRegInfo(command, value);
ASSERT(!name.empty());
text = QStringLiteral("BP %1 %2 %3")
.arg(command, 2, 16, QLatin1Char('0'))
.arg(value, 6, 16, QLatin1Char('0'))
.arg(QString::fromStdString(name));
}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
{
const auto [desc, written] = GetXFIndexedLoadInfo(array, index, address, size);
text = QStringLiteral("LOAD INDX %1 %2")
.arg(QString::fromStdString(fmt::to_string(array)))
.arg(QString::fromStdString(desc));
}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
{
const auto name = fmt::to_string(primitive);
// Note that vertex_count is allowed to be 0, with no special treatment
// (another command just comes right after the current command, with no vertices in between)
const u32 object_prim_size = num_vertices * vertex_size;
const u8 opcode =
0x80 | (static_cast<u8>(primitive) << OpcodeDecoder::GX_PRIMITIVE_SHIFT) | vat;
text = QStringLiteral("PRIMITIVE %1 (%2) %3 vertices %4 bytes/vertex %5 total bytes")
.arg(QString::fromStdString(name))
.arg(opcode, 2, 16, QLatin1Char('0'))
.arg(num_vertices)
.arg(vertex_size)
.arg(object_prim_size);
// It's not really useful to have a massive unreadable hex string for the object primitives.
// Put it in the description instead.
// #define INCLUDE_HEX_IN_PRIMITIVES
#ifdef INCLUDE_HEX_IN_PRIMITIVES
text += QStringLiteral(" ");
for (u32 i = 0; i < object_prim_size; i++)
{
text += QStringLiteral("%1").arg(vertex_data[i], 2, 16, QLatin1Char('0'));
}
#endif
}
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size))
{
text = QObject::tr("Call display list at %1 with size %2")
.arg(address, 8, 16, QLatin1Char('0'))
.arg(size, 8, 16, QLatin1Char('0'));
}
OPCODE_CALLBACK(void OnNop(u32 count))
{
if (count > 1)
text = QStringLiteral("NOP (%1x)").arg(count);
else
text = QStringLiteral("NOP");
}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
{
using OpcodeDecoder::Opcode;
if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS)
text = QStringLiteral("GX_CMD_UNKNOWN_METRICS");
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_INVL_VC)
text = QStringLiteral("GX_CMD_INVL_VC");
else
text = QStringLiteral("Unknown opcode %1").arg(opcode, 2, 16);
}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {}
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
QString text;
CPState m_cpmem;
};
} // namespace
void FIFOAnalyzer::UpdateDetails()
{
@ -200,205 +338,40 @@ void FIFOAnalyzer::UpdateDetails()
const auto items = m_tree_widget->selectedItems();
if (items.isEmpty() || items[0]->data(0, OBJECT_ROLE).isNull())
if (items.isEmpty() || items[0]->data(0, PART_START_ROLE).isNull())
return;
const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt();
const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt();
const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt();
const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt();
const auto& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr);
const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr);
const auto& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr);
// Note that frame_info.objectStarts[object_nr] is the start of the primitive data,
// but we want to start with the register updates which happen before that.
const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]);
const u32 object_size = frame_info.objectEnds[object_nr] - object_start;
const u8* const object = &fifo_frame.fifoData[object_start];
const u32 object_start = frame_info.parts[start_part_nr].m_start;
const u32 object_end = frame_info.parts[end_part_nr].m_end;
const u32 object_size = object_end - object_start;
u32 object_offset = 0;
// NOTE: object_info.m_cpmem is the state of cpmem _after_ all of the commands in this object.
// However, it doesn't matter that it doesn't match the start, since it will match by the time
// primitives are reached.
auto callback = DetailCallback(frame_info.parts[end_part_nr].m_cpmem);
while (object_offset < object_size)
{
QString new_label;
const u32 start_offset = object_offset;
m_object_data_offsets.push_back(start_offset);
const u8 command = object[object_offset++];
switch (command)
{
case OpcodeDecoder::GX_NOP:
if (object[object_offset] == OpcodeDecoder::GX_NOP)
{
u32 nop_count = 2;
while (object[++object_offset] == OpcodeDecoder::GX_NOP)
nop_count++;
object_offset += OpcodeDecoder::RunCommand(&fifo_frame.fifoData[object_start + start_offset],
object_size - start_offset, callback);
new_label = QStringLiteral("NOP (%1x)").arg(nop_count);
}
else
{
new_label = QStringLiteral("NOP");
}
break;
case OpcodeDecoder::GX_CMD_UNKNOWN_METRICS:
new_label = QStringLiteral("GX_CMD_UNKNOWN_METRICS");
break;
case OpcodeDecoder::GX_CMD_INVL_VC:
new_label = QStringLiteral("GX_CMD_INVL_VC");
break;
case OpcodeDecoder::GX_LOAD_CP_REG:
{
const u8 cmd2 = object[object_offset++];
const u32 value = Common::swap32(&object[object_offset]);
object_offset += 4;
const auto [name, desc] = GetCPRegInfo(cmd2, value);
ASSERT(!name.empty());
new_label = QStringLiteral("CP %1 %2 %3")
.arg(cmd2, 2, 16, QLatin1Char('0'))
.arg(value, 8, 16, QLatin1Char('0'))
.arg(QString::fromStdString(name));
}
break;
case OpcodeDecoder::GX_LOAD_XF_REG:
{
const auto [name, desc] = GetXFTransferInfo(&object[object_offset]);
const u32 cmd2 = Common::swap32(&object[object_offset]);
object_offset += 4;
ASSERT(!name.empty());
const u8 stream_size = ((cmd2 >> 16) & 15) + 1;
new_label = QStringLiteral("XF %1 ").arg(cmd2, 8, 16, QLatin1Char('0'));
for (u8 i = 0; i < stream_size; i++)
{
const u32 value = Common::swap32(&object[object_offset]);
object_offset += 4;
new_label += QStringLiteral("%1 ").arg(value, 8, 16, QLatin1Char('0'));
}
new_label += QStringLiteral(" ") + QString::fromStdString(name);
}
break;
case OpcodeDecoder::GX_LOAD_INDX_A:
{
const auto [desc, written] =
GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(&object[object_offset]));
object_offset += 4;
new_label = QStringLiteral("LOAD INDX A %1").arg(QString::fromStdString(desc));
}
break;
case OpcodeDecoder::GX_LOAD_INDX_B:
{
const auto [desc, written] =
GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(&object[object_offset]));
object_offset += 4;
new_label = QStringLiteral("LOAD INDX B %1").arg(QString::fromStdString(desc));
}
break;
case OpcodeDecoder::GX_LOAD_INDX_C:
{
const auto [desc, written] =
GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(&object[object_offset]));
object_offset += 4;
new_label = QStringLiteral("LOAD INDX C %1").arg(QString::fromStdString(desc));
}
break;
case OpcodeDecoder::GX_LOAD_INDX_D:
{
const auto [desc, written] =
GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(&object[object_offset]));
object_offset += 4;
new_label = QStringLiteral("LOAD INDX D %1").arg(QString::fromStdString(desc));
}
break;
case OpcodeDecoder::GX_CMD_CALL_DL:
// The recorder should have expanded display lists into the fifo stream and skipped the
// call to start them
// That is done to make it easier to track where memory is updated
ASSERT(false);
object_offset += 8;
new_label = QStringLiteral("CALL DL");
break;
case OpcodeDecoder::GX_LOAD_BP_REG:
{
const u8 cmd2 = object[object_offset++];
const u32 cmddata = Common::swap24(&object[object_offset]);
object_offset += 3;
const auto [name, desc] = GetBPRegInfo(cmd2, cmddata);
ASSERT(!name.empty());
new_label = QStringLiteral("BP %1 %2 %3")
.arg(cmd2, 2, 16, QLatin1Char('0'))
.arg(cmddata, 6, 16, QLatin1Char('0'))
.arg(QString::fromStdString(name));
}
break;
default:
if ((command & 0xC0) == 0x80)
{
// Object primitive data
const u8 vat = command & OpcodeDecoder::GX_VAT_MASK;
const auto& vtx_desc = frame_info.objectCPStates[object_nr].vtxDesc;
const auto& vtx_attr = frame_info.objectCPStates[object_nr].vtxAttr[vat];
const auto name = GetPrimitiveName(command);
const u16 vertex_count = Common::swap16(&object[object_offset]);
object_offset += 2;
const u32 vertex_size = VertexLoaderBase::GetVertexSize(vtx_desc, vtx_attr);
// Note that vertex_count is allowed to be 0, with no special treatment
// (another command just comes right after the current command, with no vertices in between)
const u32 object_prim_size = vertex_count * vertex_size;
new_label = QStringLiteral("PRIMITIVE %1 (%2) %3 vertices %4 bytes/vertex %5 total bytes")
.arg(QString::fromStdString(name))
.arg(command, 2, 16, QLatin1Char('0'))
.arg(vertex_count)
.arg(vertex_size)
.arg(object_prim_size);
// It's not really useful to have a massive unreadable hex string for the object primitives.
// Put it in the description instead.
// #define INCLUDE_HEX_IN_PRIMITIVES
#ifdef INCLUDE_HEX_IN_PRIMITIVES
new_label += QStringLiteral(" ");
for (u32 i = 0; i < object_prim_size; i++)
{
new_label += QStringLiteral("%1").arg(object[object_offset++], 2, 16, QLatin1Char('0'));
}
#else
object_offset += object_prim_size;
#endif
}
else
{
new_label = QStringLiteral("Unknown opcode %1").arg(command, 2, 16);
}
break;
}
new_label = QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) +
new_label;
QString new_label =
QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) +
callback.text;
m_detail_list->addItem(new_label);
}
ASSERT(object_offset == object_size);
// Needed to ensure the description updates when changing objects
m_detail_list->setCurrentRow(0);
}
@ -413,12 +386,15 @@ void FIFOAnalyzer::BeginSearch()
const auto items = m_tree_widget->selectedItems();
if (items.isEmpty() || items[0]->data(0, FRAME_ROLE).isNull() ||
items[0]->data(0, OBJECT_ROLE).isNull())
items[0]->data(0, PART_START_ROLE).isNull())
{
m_search_label->setText(tr("Invalid search parameters (no object selected)"));
return;
}
// Having PART_START_ROLE indicates that this is valid
const int object_idx = items[0]->parent()->indexOfChild(items[0]);
// TODO: Remove even string length limit
if (search_str.length() % 2)
{
@ -449,13 +425,15 @@ void FIFOAnalyzer::BeginSearch()
m_search_results.clear();
const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt();
const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt();
const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt();
const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt();
const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr);
const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr);
const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]);
const u32 object_size = frame_info.objectEnds[object_nr] - object_start;
const u32 object_start = frame_info.parts[start_part_nr].m_start;
const u32 object_end = frame_info.parts[end_part_nr].m_end;
const u32 object_size = object_end - object_start;
const u8* const object = &fifo_frame.fifoData[object_start];
@ -474,7 +452,7 @@ void FIFOAnalyzer::BeginSearch()
{
if (std::equal(search_val.begin(), search_val.end(), ptr))
{
m_search_results.emplace_back(frame_nr, object_nr, cmd_nr);
m_search_results.emplace_back(frame_nr, object_idx, cmd_nr);
break;
}
}
@ -528,7 +506,7 @@ void FIFOAnalyzer::ShowSearchResult(size_t index)
const auto& result = m_search_results[index];
QTreeWidgetItem* object_item =
m_tree_widget->topLevelItem(0)->child(result.m_frame)->child(result.m_object);
m_tree_widget->topLevelItem(0)->child(result.m_frame)->child(result.m_object_idx);
m_tree_widget->setCurrentItem(object_item);
m_detail_list->setCurrentRow(result.m_cmd);
@ -537,6 +515,225 @@ void FIFOAnalyzer::ShowSearchResult(size_t index)
m_search_previous->setEnabled(index > 0);
}
namespace
{
// TODO: Not sure whether we should bother translating the descriptions
class DescriptionCallback : public OpcodeDecoder::Callback
{
public:
explicit DescriptionCallback(const CPState& cpmem) : m_cpmem(cpmem) {}
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
{
const auto [name, desc] = GetBPRegInfo(command, value);
ASSERT(!name.empty());
text = QObject::tr("BP register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += QObject::tr("No description available");
else
text += QString::fromStdString(desc);
}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
{
// Note: No need to update m_cpmem as it already has the final value for this object
const auto [name, desc] = GetCPRegInfo(command, value);
ASSERT(!name.empty());
text = QObject::tr("CP register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += QObject::tr("No description available");
else
text += QString::fromStdString(desc);
}
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
{
const auto [name, desc] = GetXFTransferInfo(address, count, data);
ASSERT(!name.empty());
text = QObject::tr("XF register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += QObject::tr("No description available");
else
text += QString::fromStdString(desc);
}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
{
const auto [desc, written] = GetXFIndexedLoadInfo(array, index, address, size);
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
switch (array)
{
case CPArray::XF_A:
text += QObject::tr("Usually used for position matrices");
break;
case CPArray::XF_B:
// i18n: A normal matrix is a matrix used for transforming normal vectors. The word "normal"
// does not have its usual meaning here, but rather the meaning of "perpendicular to a
// surface".
text += QObject::tr("Usually used for normal matrices");
break;
case CPArray::XF_C:
// i18n: Tex coord is short for texture coordinate
text += QObject::tr("Usually used for tex coord matrices");
break;
case CPArray::XF_D:
text += QObject::tr("Usually used for light objects");
break;
default:
break;
}
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
{
const auto name = fmt::format("{} VAT {}", primitive, vat);
// i18n: In this context, a primitive means a point, line, triangle or rectangle.
// Do not translate the word primitive as if it was an adjective.
text = QObject::tr("Primitive %1").arg(QString::fromStdString(name));
text += QLatin1Char{'\n'};
const auto& vtx_desc = m_cpmem.vtx_desc;
const auto& vtx_attr = m_cpmem.vtx_attr[vat];
u32 i = 0;
const auto process_component = [&](VertexComponentFormat cformat, ComponentFormat format,
u32 non_indexed_count, u32 indexed_count = 1) {
u32 count;
if (cformat == VertexComponentFormat::NotPresent)
return;
else if (cformat == VertexComponentFormat::Index8)
{
format = ComponentFormat::UByte;
count = indexed_count;
}
else if (cformat == VertexComponentFormat::Index16)
{
format = ComponentFormat::UShort;
count = indexed_count;
}
else
{
count = non_indexed_count;
}
const u32 component_size = GetElementSize(format);
for (u32 j = 0; j < count; j++)
{
for (u32 component_off = 0; component_off < component_size; component_off++)
{
text += QStringLiteral("%1").arg(vertex_data[i + component_off], 2, 16, QLatin1Char('0'));
}
if (format == ComponentFormat::Float)
{
const float value = Common::BitCast<float>(Common::swap32(&vertex_data[i]));
text += QStringLiteral(" (%1)").arg(value);
}
i += component_size;
text += QLatin1Char{' '};
}
text += QLatin1Char{' '};
};
const auto process_simple_component = [&](u32 size) {
for (u32 component_off = 0; component_off < size; component_off++)
{
text += QStringLiteral("%1").arg(vertex_data[i + component_off], 2, 16, QLatin1Char('0'));
}
i += size;
text += QLatin1Char{' '};
text += QLatin1Char{' '};
};
for (u32 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
ASSERT(i == vertex_num * vertex_size);
text += QLatin1Char{'\n'};
if (vtx_desc.low.PosMatIdx)
process_simple_component(1);
for (auto texmtxidx : vtx_desc.low.TexMatIdx)
{
if (texmtxidx)
process_simple_component(1);
}
process_component(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
vtx_attr.g0.PosElements == CoordComponentCount::XY ? 2 : 3);
// TODO: Is this calculation correct?
const u32 normal_component_count =
vtx_desc.low.Normal == VertexComponentFormat::Direct ? 3 : 1;
const u32 normal_elements = vtx_attr.g0.NormalElements == NormalComponentCount::NBT ? 3 : 1;
process_component(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
normal_component_count * normal_elements,
vtx_attr.g0.NormalIndex3 ? normal_elements : 1);
for (u32 c = 0; c < vtx_desc.low.Color.Size(); c++)
{
static constexpr Common::EnumMap<u32, ColorFormat::RGBA8888> component_sizes = {
2, // RGB565
3, // RGB888
4, // RGB888x
2, // RGBA4444
3, // RGBA6666
4, // RGBA8888
};
switch (vtx_desc.low.Color[c])
{
case VertexComponentFormat::Index8:
process_simple_component(1);
break;
case VertexComponentFormat::Index16:
process_simple_component(2);
break;
case VertexComponentFormat::Direct:
process_simple_component(component_sizes[vtx_attr.GetColorFormat(c)]);
break;
}
}
for (u32 t = 0; t < vtx_desc.high.TexCoord.Size(); t++)
{
process_component(vtx_desc.high.TexCoord[t], vtx_attr.GetTexFormat(t),
vtx_attr.GetTexElements(t) == TexComponentCount::ST ? 2 : 1);
}
}
}
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size))
{
text = QObject::tr("No description available");
}
OPCODE_CALLBACK(void OnNop(u32 count)) { text = QObject::tr("No description available"); }
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
{
text = QObject::tr("No description available");
}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {}
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
QString text;
CPState m_cpmem;
};
} // namespace
void FIFOAnalyzer::UpdateDescription()
{
m_entry_detail_browser->clear();
@ -549,148 +746,24 @@ void FIFOAnalyzer::UpdateDescription()
if (items.isEmpty() || m_object_data_offsets.empty())
return;
if (items[0]->data(0, FRAME_ROLE).isNull() || items[0]->data(0, OBJECT_ROLE).isNull())
if (items[0]->data(0, FRAME_ROLE).isNull() || items[0]->data(0, PART_START_ROLE).isNull())
return;
const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt();
const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt();
const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt();
const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt();
const u32 entry_nr = m_detail_list->currentRow();
const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr);
const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr);
const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]);
const u32 object_start = frame_info.parts[start_part_nr].m_start;
const u32 object_end = frame_info.parts[end_part_nr].m_end;
const u32 object_size = object_end - object_start;
const u32 entry_start = m_object_data_offsets[entry_nr];
const u8* cmddata = &fifo_frame.fifoData[object_start + entry_start];
// TODO: Not sure whether we should bother translating the descriptions
QString text;
if (*cmddata == OpcodeDecoder::GX_LOAD_BP_REG)
{
const u8 cmd = *(cmddata + 1);
const u32 value = Common::swap24(cmddata + 2);
const auto [name, desc] = GetBPRegInfo(cmd, value);
ASSERT(!name.empty());
text = tr("BP register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += tr("No description available");
else
text += QString::fromStdString(desc);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_CP_REG)
{
const u8 cmd = *(cmddata + 1);
const u32 value = Common::swap32(cmddata + 2);
const auto [name, desc] = GetCPRegInfo(cmd, value);
ASSERT(!name.empty());
text = tr("CP register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += tr("No description available");
else
text += QString::fromStdString(desc);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_XF_REG)
{
const auto [name, desc] = GetXFTransferInfo(cmddata + 1);
ASSERT(!name.empty());
text = tr("XF register ");
text += QString::fromStdString(name);
text += QLatin1Char{'\n'};
if (desc.empty())
text += tr("No description available");
else
text += QString::fromStdString(desc);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_A)
{
const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(cmddata + 1));
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
text += tr("Usually used for position matrices");
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_B)
{
const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(cmddata + 1));
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
// i18n: A normal matrix is a matrix used for transforming normal vectors. The word "normal"
// does not have its usual meaning here, but rather the meaning of "perpendicular to a surface".
text += tr("Usually used for normal matrices");
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_C)
{
const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(cmddata + 1));
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
// i18n: Tex coord is short for texture coordinate
text += tr("Usually used for tex coord matrices");
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_D)
{
const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(cmddata + 1));
text = QString::fromStdString(desc);
text += QLatin1Char{'\n'};
text += tr("Usually used for light objects");
text += QLatin1Char{'\n'};
text += QString::fromStdString(written);
}
else if ((*cmddata & 0xC0) == 0x80)
{
const u8 vat = *cmddata & OpcodeDecoder::GX_VAT_MASK;
const QString name = QString::fromStdString(GetPrimitiveName(*cmddata));
const u16 vertex_count = Common::swap16(cmddata + 1);
// i18n: In this context, a primitive means a point, line, triangle or rectangle.
// Do not translate the word primitive as if it was an adjective.
text = tr("Primitive %1").arg(name);
text += QLatin1Char{'\n'};
const auto& vtx_desc = frame_info.objectCPStates[object_nr].vtxDesc;
const auto& vtx_attr = frame_info.objectCPStates[object_nr].vtxAttr[vat];
const auto component_sizes = VertexLoaderBase::GetVertexComponentSizes(vtx_desc, vtx_attr);
u32 i = 3;
for (u32 vertex_num = 0; vertex_num < vertex_count; vertex_num++)
{
text += QLatin1Char{'\n'};
for (u32 comp_size : component_sizes)
{
for (u32 comp_off = 0; comp_off < comp_size; comp_off++)
{
text += QStringLiteral("%1").arg(cmddata[i++], 2, 16, QLatin1Char('0'));
}
text += QLatin1Char{' '};
}
}
}
else
{
text = tr("No description available");
}
m_entry_detail_browser->setText(text);
auto callback = DescriptionCallback(frame_info.parts[end_part_nr].m_cpmem);
OpcodeDecoder::RunCommand(&fifo_frame.fifoData[object_start + entry_start],
object_size - entry_start, callback);
m_entry_detail_browser->setText(callback.text);
}

View File

@ -58,15 +58,19 @@ private:
struct SearchResult
{
constexpr SearchResult(u32 frame, u32 object, u32 cmd)
: m_frame(frame), m_object(object), m_cmd(cmd)
constexpr SearchResult(u32 frame, u32 object_idx, u32 cmd)
: m_frame(frame), m_object_idx(object_idx), m_cmd(cmd)
{
}
const u32 m_frame;
const u32 m_object;
// Index in tree view. Does not correspond with object numbers or part numbers.
const u32 m_object_idx;
const u32 m_cmd;
};
// Offsets from the start of the first part in an object for each command within the currently
// selected object.
std::vector<int> m_object_data_offsets;
std::vector<SearchResult> m_search_results;
};

View File

@ -21,7 +21,6 @@
#include "Core/Core.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include "Core/FifoPlayer/FifoPlayer.h"
#include "Core/FifoPlayer/FifoRecorder.h"
@ -151,18 +150,18 @@ void FIFOPlayerWindow::CreateWidgets()
layout->addWidget(recording_group);
layout->addWidget(m_button_box);
QWidget* main_widget = new QWidget(this);
main_widget->setLayout(layout);
m_main_widget = new QWidget(this);
m_main_widget->setLayout(layout);
auto* tab_widget = new QTabWidget(this);
m_tab_widget = new QTabWidget(this);
m_analyzer = new FIFOAnalyzer;
tab_widget->addTab(main_widget, tr("Play / Record"));
tab_widget->addTab(m_analyzer, tr("Analyze"));
m_tab_widget->addTab(m_main_widget, tr("Play / Record"));
m_tab_widget->addTab(m_analyzer, tr("Analyze"));
auto* tab_layout = new QVBoxLayout;
tab_layout->addWidget(tab_widget);
tab_layout->addWidget(m_tab_widget);
setLayout(tab_layout);
}
@ -251,6 +250,8 @@ void FIFOPlayerWindow::OnEmulationStopped()
StopRecording();
UpdateControls();
// When emulation stops, switch away from the analyzer tab, as it no longer shows anything useful
m_tab_widget->setCurrentWidget(m_main_widget);
m_analyzer->Update();
}

View File

@ -12,6 +12,7 @@ class QDialogButtonBox;
class QLabel;
class QPushButton;
class QSpinBox;
class QTabWidget;
class FIFOAnalyzer;
class FIFOPlayerWindow : public QWidget
@ -64,6 +65,9 @@ private:
QCheckBox* m_early_memory_updates;
QDialogButtonBox* m_button_box;
QWidget* m_main_widget;
QTabWidget* m_tab_widget;
FIFOAnalyzer* m_analyzer;
Core::State m_emu_state = Core::State::Uninitialized;
};

View File

@ -12,7 +12,6 @@ set_target_properties(dolphin-tool PROPERTIES OUTPUT_NAME dolphin-tool)
target_link_libraries(dolphin-tool
PRIVATE
core
discio
videocommon
cpp-optparse

View File

@ -3,6 +3,8 @@
#include <array>
#include "Common/EnumMap.h"
#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DRender.h"
#include "VideoBackends/D3D/D3DState.h"
@ -20,55 +22,75 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<D3DVertexFormat>(vtx_decl);
}
static const DXGI_FORMAT d3d_format_lookup[5 * 4 * 2] = {
// float formats
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_SNORM,
DXGI_FORMAT_R16_UNORM,
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R8G8_SNORM,
DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,
// integer formats
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R8G8_UINT,
DXGI_FORMAT_R8G8_SINT,
DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
};
DXGI_FORMAT VarToD3D(VarType t, int size, bool integer)
DXGI_FORMAT VarToD3D(ComponentFormat t, int size, bool integer)
{
DXGI_FORMAT retval = d3d_format_lookup[(int)t + 5 * (size - 1) + 5 * 4 * (int)integer];
using FormatMap = Common::EnumMap<DXGI_FORMAT, ComponentFormat::Float>;
static constexpr auto f = [](FormatMap a) { return a; }; // Deduction helper
static constexpr std::array<FormatMap, 4> d3d_float_format_lookup = {
f({
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_SNORM,
DXGI_FORMAT_R16_UNORM,
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
}),
f({
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R8G8_SNORM,
DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
}),
f({
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R32G32B32_FLOAT,
}),
f({
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,
}),
};
static constexpr std::array<FormatMap, 4> d3d_integer_format_lookup = {
f({
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_R8G8_UINT,
DXGI_FORMAT_R8G8_SINT,
DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
};
DXGI_FORMAT retval =
integer ? d3d_integer_format_lookup[size - 1][t] : d3d_float_format_lookup[size - 1][t];
if (retval == DXGI_FORMAT_UNKNOWN)
{
PanicAlertFmt("VarToD3D: Invalid type/size combo {}, {}, {}", t, size, integer);

View File

@ -4,39 +4,43 @@
#include "VideoBackends/D3D12/DX12VertexFormat.h"
#include "Common/Assert.h"
#include "Common/EnumMap.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderGen.h"
namespace DX12
{
static DXGI_FORMAT VarToDXGIFormat(VarType t, u32 components, bool integer)
static DXGI_FORMAT VarToDXGIFormat(ComponentFormat t, u32 components, bool integer)
{
using ComponentArray = std::array<DXGI_FORMAT, 4>;
static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper
// NOTE: 3-component formats are not valid.
static const DXGI_FORMAT float_type_lookup[][4] = {
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE
{DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE
{DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT
{DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> float_type_lookup = {
f({DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM}), // UByte
f({DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R8G8B8A8_SNORM}), // Byte
f({DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_UNORM}), // UShort
f({DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R16G16B16A16_SNORM}), // Short
f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float
};
static const DXGI_FORMAT integer_type_lookup[][4] = {
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> integer_type_lookup = {
f({DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_UINT}), // UByte
f({DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R8G8B8A8_SINT}), // Byte
f({DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_UINT}), // UShort
f({DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R16G16B16A16_SINT}), // Short
f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float
};
ASSERT(components > 0 && components <= 4);

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/GL/GLUtil.h"
#include "Common/MsgHandler.h"
@ -23,10 +24,11 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<GLVertexFormat>(vtx_decl);
}
static inline GLuint VarToGL(VarType t)
static inline GLuint VarToGL(ComponentFormat t)
{
static const GLuint lookup[5] = {GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT,
GL_FLOAT};
static constexpr Common::EnumMap<GLuint, ComponentFormat::Float> lookup = {
GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, GL_FLOAT,
};
return lookup[t];
}

View File

@ -36,20 +36,21 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
{
DebugUtil::OnObjectBegin();
u8 primitiveType = 0;
using OpcodeDecoder::Primitive;
Primitive primitive_type = Primitive::GX_DRAW_QUADS;
switch (m_current_primitive_type)
{
case PrimitiveType::Points:
primitiveType = OpcodeDecoder::GX_DRAW_POINTS;
primitive_type = Primitive::GX_DRAW_POINTS;
break;
case PrimitiveType::Lines:
primitiveType = OpcodeDecoder::GX_DRAW_LINES;
primitive_type = Primitive::GX_DRAW_LINES;
break;
case PrimitiveType::Triangles:
primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLES;
primitive_type = Primitive::GX_DRAW_TRIANGLES;
break;
case PrimitiveType::TriangleStrip:
primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP;
primitive_type = Primitive::GX_DRAW_TRIANGLE_STRIP;
break;
}
@ -57,7 +58,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
if (g_renderer->IsBBoxEnabled())
g_renderer->BBoxFlush();
m_setup_unit.Init(primitiveType);
m_setup_unit.Init(primitive_type);
// set all states with are stored within video sw
for (int i = 0; i < 4; i++)
@ -74,7 +75,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
memset(static_cast<void*>(&m_vertex), 0, sizeof(m_vertex));
// parse the videocommon format to our own struct format (m_vertex)
SetFormat(g_main_cp_state.last_id, primitiveType);
SetFormat();
ParseVertex(VertexLoaderManager::GetCurrentVertexFormat()->GetVertexDeclaration(), index);
// transform this vertex so that it can be used for rasterization (outVertex)
@ -98,7 +99,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
DebugUtil::OnObjectEnd();
}
void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
void SWVertexLoader::SetFormat()
{
// matrix index from xf regs or cp memory?
if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx ||
@ -144,7 +145,7 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f
if (format.enable)
{
src.Skip(format.offset);
src.Skip(base_component * (1 << (format.type >> 1)));
src.Skip(base_component * GetElementSize(format.type));
int i;
for (i = 0; i < std::min(format.components - base_component, components); i++)
@ -152,24 +153,24 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f
int i_dst = reverse ? components - i - 1 : i;
switch (format.type)
{
case VAR_UNSIGNED_BYTE:
case ComponentFormat::UByte:
dst[i_dst] = ReadNormalized<T, u8>(src.Read<u8, swap>());
break;
case VAR_BYTE:
case ComponentFormat::Byte:
dst[i_dst] = ReadNormalized<T, s8>(src.Read<s8, swap>());
break;
case VAR_UNSIGNED_SHORT:
case ComponentFormat::UShort:
dst[i_dst] = ReadNormalized<T, u16>(src.Read<u16, swap>());
break;
case VAR_SHORT:
case ComponentFormat::Short:
dst[i_dst] = ReadNormalized<T, s16>(src.Read<s16, swap>());
break;
case VAR_FLOAT:
case ComponentFormat::Float:
dst[i_dst] = ReadNormalized<T, float>(src.Read<float, swap>());
break;
}
ASSERT_MSG(VIDEO, !format.integer || format.type != VAR_FLOAT,
ASSERT_MSG(VIDEO, !format.integer || format.type != ComponentFormat::Float,
"only non-float values are allowed to be streamed as integer");
}
for (; i < components; i++)

View File

@ -22,7 +22,7 @@ public:
protected:
void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override;
void SetFormat(u8 attributeIndex, u8 primitiveType);
void SetFormat();
void ParseVertex(const PortableVertexDeclaration& vdec, int index);
InputVertexData m_vertex{};

View File

@ -9,9 +9,9 @@
#include "VideoBackends/Software/Clipper.h"
#include "VideoCommon/OpcodeDecoding.h"
void SetupUnit::Init(u8 primitiveType)
void SetupUnit::Init(OpcodeDecoder::Primitive primitive_type)
{
m_PrimType = primitiveType;
m_PrimType = primitive_type;
m_VertexCounter = 0;
m_VertPointer[0] = &m_Vertices[0];
@ -28,31 +28,32 @@ OutputVertexData* SetupUnit::GetVertex()
void SetupUnit::SetupVertex()
{
using OpcodeDecoder::Primitive;
switch (m_PrimType)
{
case OpcodeDecoder::GX_DRAW_QUADS:
case Primitive::GX_DRAW_QUADS:
SetupQuad();
break;
case OpcodeDecoder::GX_DRAW_QUADS_2:
case Primitive::GX_DRAW_QUADS_2:
WARN_LOG_FMT(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2");
SetupQuad();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLES:
case Primitive::GX_DRAW_TRIANGLES:
SetupTriangle();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP:
case Primitive::GX_DRAW_TRIANGLE_STRIP:
SetupTriStrip();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN:
case Primitive::GX_DRAW_TRIANGLE_FAN:
SetupTriFan();
break;
case OpcodeDecoder::GX_DRAW_LINES:
case Primitive::GX_DRAW_LINES:
SetupLine();
break;
case OpcodeDecoder::GX_DRAW_LINE_STRIP:
case Primitive::GX_DRAW_LINE_STRIP:
SetupLineStrip();
break;
case OpcodeDecoder::GX_DRAW_POINTS:
case Primitive::GX_DRAW_POINTS:
SetupPoint();
break;
}

View File

@ -6,9 +6,14 @@
#include "Common/CommonTypes.h"
#include "VideoBackends/Software/NativeVertexFormat.h"
namespace OpcodeDecoder
{
enum class Primitive : u8;
}
class SetupUnit
{
u8 m_PrimType = 0;
OpcodeDecoder::Primitive m_PrimType{};
int m_VertexCounter = 0;
OutputVertexData m_Vertices[3];
@ -24,7 +29,7 @@ class SetupUnit
void SetupPoint();
public:
void Init(u8 primitiveType);
void Init(OpcodeDecoder::Primitive primitive_type);
OutputVertexData* GetVertex();

View File

@ -4,6 +4,7 @@
#include "VideoBackends/Vulkan/VKVertexFormat.h"
#include "Common/Assert.h"
#include "Common/EnumMap.h"
#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/ObjectCache.h"
@ -13,32 +14,35 @@
namespace Vulkan
{
static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer)
static VkFormat VarToVkFormat(ComponentFormat t, uint32_t components, bool integer)
{
static const VkFormat float_type_lookup[][4] = {
{VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE
{VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM,
VK_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE
{VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM,
VK_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT
{VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT
{VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT
using ComponentArray = std::array<VkFormat, 4>;
static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> float_type_lookup = {
f({VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8A8_UNORM}), // UByte
f({VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM,
VK_FORMAT_R8G8B8A8_SNORM}), // Byte
f({VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM,
VK_FORMAT_R16G16B16A16_UNORM}), // UShort
f({VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16A16_SNORM}), // Short
f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT}), // Float
};
static const VkFormat integer_type_lookup[][4] = {
{VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE
{VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE
{VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT
{VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT
{VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> integer_type_lookup = {
f({VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8A8_UINT}), // UByte
f({VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8A8_SINT}), // Byte
f({VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16A16_UINT}), // UShort
f({VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16A16_SINT}), // Short
f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT}), // Float
};
ASSERT(components > 0 && components <= 4);

View File

@ -258,7 +258,7 @@ enum class TevBias : u32
{
Zero = 0,
AddHalf = 1,
Subhalf = 2,
SubHalf = 2,
Compare = 3
};
template <>
@ -491,6 +491,94 @@ struct fmt::formatter<TevStageCombiner::ColorCombiner>
template <typename FormatContext>
auto format(const TevStageCombiner::ColorCombiner& cc, FormatContext& ctx)
{
auto out = ctx.out();
if (cc.bias != TevBias::Compare)
{
// Generate an equation view, simplifying out addition of zero and multiplication by 1
// dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale
// or equivalently and more readably when the terms are not constants:
// dest = (d (OP) lerp(a, b, c) + bias) * scale
// Note that lerping is more complex than the first form shows; see PixelShaderGen's
// WriteTevRegular for more details.
static constexpr Common::EnumMap<const char*, TevColorArg::Zero> alt_names = {
"prev.rgb", "prev.aaa", "c0.rgb", "c0.aaa", "c1.rgb", "c1.aaa", "c2.rgb", "c2.aaa",
"tex.rgb", "tex.aaa", "ras.rgb", "ras.aaa", "1", ".5", "konst.rgb", "0",
};
const bool has_d = cc.d != TevColorArg::Zero;
// If c is one, (1 - c) is zero, so (1-c)*a is zero
const bool has_ac = cc.a != TevColorArg::Zero && cc.c != TevColorArg::One;
// If either b or c is zero, b*c is zero
const bool has_bc = cc.b != TevColorArg::Zero && cc.c != TevColorArg::Zero;
const bool has_bias = cc.bias != TevBias::Zero; // != Compare is already known
const bool has_scale = cc.scale != TevScale::Scale1;
const char op = (cc.op == TevOp::Sub ? '-' : '+');
if (cc.dest == TevOutput::Prev)
out = format_to(out, "dest.rgb = ");
else
out = format_to(out, "{:n}.rgb = ", cc.dest);
if (has_scale)
out = format_to(out, "(");
if (has_d)
out = format_to(out, "{}", alt_names[cc.d]);
if (has_ac || has_bc)
{
if (has_d)
out = format_to(out, " {} ", op);
else if (cc.op == TevOp::Sub)
out = format_to(out, "{}", op);
if (has_ac && has_bc)
{
if (cc.c == TevColorArg::Half)
{
// has_a and has_b imply that c is not Zero or One, and Half is the only remaining
// numeric constant. This results in an average.
out = format_to(out, "({} + {})/2", alt_names[cc.a], alt_names[cc.b]);
}
else
{
out = format_to(out, "lerp({}, {}, {})", alt_names[cc.a], alt_names[cc.b],
alt_names[cc.c]);
}
}
else if (has_ac)
{
if (cc.c == TevColorArg::Zero)
out = format_to(out, "{}", alt_names[cc.a]);
else if (cc.c == TevColorArg::Half) // 1 - .5 is .5
out = format_to(out, ".5*{}", alt_names[cc.a]);
else
out = format_to(out, "(1 - {})*{}", alt_names[cc.c], alt_names[cc.a]);
}
else // has_bc
{
if (cc.c == TevColorArg::One)
out = format_to(out, "{}", alt_names[cc.b]);
else
out = format_to(out, "{}*{}", alt_names[cc.c], alt_names[cc.b]);
}
}
if (has_bias)
{
if (has_ac || has_bc || has_d)
out = format_to(out, cc.bias == TevBias::AddHalf ? " + .5" : " - .5");
else
out = format_to(out, cc.bias == TevBias::AddHalf ? ".5" : "-.5");
}
else
{
// If nothing has been written so far, add a zero
if (!(has_ac || has_bc || has_d))
out = format_to(out, "0");
}
if (has_scale)
out = format_to(out, ") * {:n}", cc.scale);
out = format_to(out, "\n\n");
}
return format_to(ctx.out(),
"a: {}\n"
"b: {}\n"
@ -512,7 +600,80 @@ struct fmt::formatter<TevStageCombiner::AlphaCombiner>
template <typename FormatContext>
auto format(const TevStageCombiner::AlphaCombiner& ac, FormatContext& ctx)
{
return format_to(ctx.out(),
auto out = ctx.out();
if (ac.bias != TevBias::Compare)
{
// Generate an equation view, simplifying out addition of zero and multiplication by 1
// dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale
// or equivalently and more readably when the terms are not constants:
// dest = (d (OP) lerp(a, b, c) + bias) * scale
// Note that lerping is more complex than the first form shows; see PixelShaderGen's
// WriteTevRegular for more details.
// We don't need an alt_names map here, unlike the color combiner, as the only special term is
// Zero, and we we filter that out below. However, we do need to append ".a" to all
// parameters, to make it explicit that these are operations on the alpha term instead of the
// 4-element vector. We also need to use the :n specifier so that the numeric ID isn't shown.
const bool has_d = ac.d != TevAlphaArg::Zero;
// There is no c value for alpha that results in (1 - c) always being zero
const bool has_ac = ac.a != TevAlphaArg::Zero;
// If either b or c is zero, b*c is zero
const bool has_bc = ac.b != TevAlphaArg::Zero && ac.c != TevAlphaArg::Zero;
const bool has_bias = ac.bias != TevBias::Zero; // != Compare is already known
const bool has_scale = ac.scale != TevScale::Scale1;
const char op = (ac.op == TevOp::Sub ? '-' : '+');
if (ac.dest == TevOutput::Prev)
out = format_to(out, "dest.a = ");
else
out = format_to(out, "{:n}.a = ", ac.dest);
if (has_scale)
out = format_to(out, "(");
if (has_d)
out = format_to(out, "{:n}.a", ac.d);
if (has_ac || has_bc)
{
if (has_d)
out = format_to(out, " {} ", op);
else if (ac.op == TevOp::Sub)
out = format_to(out, "{}", op);
if (has_ac && has_bc)
{
out = format_to(out, "lerp({:n}.a, {:n}.a, {:n}.a)", ac.a, ac.b, ac.c);
}
else if (has_ac)
{
if (ac.c == TevAlphaArg::Zero)
out = format_to(out, "{:n}.a", ac.a);
else
out = format_to(out, "(1 - {:n}.a)*{:n}.a", ac.c, ac.a);
}
else // has_bc
{
out = format_to(out, "{:n}.a*{:n}.a", ac.c, ac.b);
}
}
if (has_bias)
{
if (has_ac || has_bc || has_d)
out = format_to(out, ac.bias == TevBias::AddHalf ? " + .5" : " - .5");
else
out = format_to(out, ac.bias == TevBias::AddHalf ? ".5" : "-.5");
}
else
{
// If nothing has been written so far, add a zero
if (!(has_ac || has_bc || has_d))
out = format_to(out, "0");
}
if (has_scale)
out = format_to(out, ") * {:n}", ac.scale);
out = format_to(out, "\n\n");
}
return format_to(out,
"a: {}\n"
"b: {}\n"
"c: {}\n"
@ -756,14 +917,14 @@ struct fmt::formatter<LODType> : EnumFormatter<LODType::Diagonal>
formatter() : EnumFormatter({"Edge LOD", "Diagonal LOD"}) {}
};
enum class MaxAnsio
enum class MaxAniso
{
One = 0,
Two = 1,
Four = 2,
};
template <>
struct fmt::formatter<MaxAnsio> : EnumFormatter<MaxAnsio::Four>
struct fmt::formatter<MaxAniso> : EnumFormatter<MaxAniso::Four>
{
formatter() : EnumFormatter({"1", "2", "4"}) {}
};
@ -777,7 +938,7 @@ union TexMode0
BitField<7, 1, FilterMode> min_filter;
BitField<8, 1, LODType> diag_lod;
BitField<9, 8, s32> lod_bias;
BitField<19, 2, MaxAnsio> max_aniso;
BitField<19, 2, MaxAniso> max_aniso;
BitField<21, 1, bool, u32> lod_clamp;
u32 hex;
};
@ -2205,7 +2366,7 @@ struct BPMemory
extern BPMemory bpmem;
void LoadBPReg(u32 value0, int cycles_into_future);
void LoadBPRegPreprocess(u32 value0, int cycles_into_future);
void LoadBPReg(u8 reg, u32 value, int cycles_into_future);
void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future);
std::pair<std::string, std::string> GetBPRegInfo(u8 cmd, u32 cmddata);

View File

@ -716,29 +716,27 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
bp.newvalue);
}
// Call browser: OpcodeDecoding.cpp ExecuteDisplayList > Decode() > LoadBPReg()
void LoadBPReg(u32 value0, int cycles_into_future)
// Call browser: OpcodeDecoding.cpp RunCallback::OnBP()
void LoadBPReg(u8 reg, u32 value, int cycles_into_future)
{
int regNum = value0 >> 24;
int oldval = ((u32*)&bpmem)[regNum];
int newval = (oldval & ~bpmem.bpMask) | (value0 & bpmem.bpMask);
int oldval = ((u32*)&bpmem)[reg];
int newval = (oldval & ~bpmem.bpMask) | (value & bpmem.bpMask);
int changes = (oldval ^ newval) & 0xFFFFFF;
BPCmd bp = {regNum, changes, newval};
BPCmd bp = {reg, changes, newval};
// Reset the mask register if we're not trying to set it ourselves.
if (regNum != BPMEM_BP_MASK)
if (reg != BPMEM_BP_MASK)
bpmem.bpMask = 0xFFFFFF;
BPWritten(bp, cycles_into_future);
}
void LoadBPRegPreprocess(u32 value0, int cycles_into_future)
void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future)
{
int regNum = value0 >> 24;
// masking could hypothetically be a problem
u32 newval = value0 & 0xffffff;
switch (regNum)
// masking via BPMEM_BP_MASK could hypothetically be a problem
u32 newval = value & 0xffffff;
switch (reg)
{
case BPMEM_SETDRAWDONE:
if ((newval & 0xff) == 0x02)

View File

@ -2,7 +2,14 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoCommon/CPMemory.h"
#include <cstring>
#include "Common/ChunkFile.h"
#include "Common/Logging/Log.h"
#include "Core/DolphinAnalytics.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/VertexLoaderManager.h"
// CP state
CPState g_main_cp_state;
@ -22,13 +29,13 @@ void DoCPState(PointerWrap& p)
if (p.mode == PointerWrap::MODE_READ)
{
CopyPreprocessCPStateFromMain();
g_main_cp_state.bases_dirty = true;
VertexLoaderManager::g_bases_dirty = true;
}
}
void CopyPreprocessCPStateFromMain()
{
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
std::memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
}
std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value)
@ -62,12 +69,167 @@ std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value)
return std::make_pair(fmt::format("CP_VAT_REG_C - Format {}", cmd & CP_VAT_MASK),
fmt::to_string(UVAT_group2{.Hex = value}));
case ARRAY_BASE:
return std::make_pair(fmt::format("ARRAY_BASE Array {}", cmd & CP_ARRAY_MASK),
fmt::format("Base address {:08x}", value));
return std::make_pair(
fmt::format("ARRAY_BASE Array {}", static_cast<CPArray>(cmd & CP_ARRAY_MASK)),
fmt::format("Base address {:08x}", value));
case ARRAY_STRIDE:
return std::make_pair(fmt::format("ARRAY_STRIDE Array {}", cmd - ARRAY_STRIDE),
fmt::format("Stride {:02x}", value & 0xff));
return std::make_pair(
fmt::format("ARRAY_STRIDE Array {}", static_cast<CPArray>(cmd & CP_ARRAY_MASK)),
fmt::format("Stride {:02x}", value & 0xff));
default:
return std::make_pair(fmt::format("Invalid CP register {:02x} = {:08x}", cmd, value), "");
}
}
CPState::CPState(const u32* memory) : CPState()
{
matrix_index_a.Hex = memory[MATINDEX_A];
matrix_index_b.Hex = memory[MATINDEX_B];
vtx_desc.low.Hex = memory[VCD_LO];
vtx_desc.high.Hex = memory[VCD_HI];
for (u32 i = 0; i < CP_NUM_VAT_REG; i++)
{
vtx_attr[i].g0.Hex = memory[CP_VAT_REG_A + i];
vtx_attr[i].g1.Hex = memory[CP_VAT_REG_B + i];
vtx_attr[i].g2.Hex = memory[CP_VAT_REG_C + i];
}
for (u32 i = 0; i < CP_NUM_ARRAYS; i++)
{
array_bases[static_cast<CPArray>(i)] = memory[ARRAY_BASE + i];
array_strides[static_cast<CPArray>(i)] = memory[ARRAY_STRIDE + i];
}
}
void CPState::LoadCPReg(u8 sub_cmd, u32 value)
{
switch (sub_cmd & CP_COMMAND_MASK)
{
case UNKNOWN_00:
case UNKNOWN_10:
case UNKNOWN_20:
if (!(sub_cmd == UNKNOWN_20 && value == 0))
{
// All titles using libogc or the official SDK issue 0x20 with value=0 on startup
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}",
sub_cmd);
}
break;
case MATINDEX_A:
if (sub_cmd != MATINDEX_A)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_A: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_A, sub_cmd);
}
matrix_index_a.Hex = value;
break;
case MATINDEX_B:
if (sub_cmd != MATINDEX_B)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_B: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_B, sub_cmd);
}
matrix_index_b.Hex = value;
break;
case VCD_LO:
if (sub_cmd != VCD_LO) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_LO: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_LO, sub_cmd);
}
vtx_desc.low.Hex = value;
break;
case VCD_HI:
if (sub_cmd != VCD_HI) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_HI: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_HI, sub_cmd);
}
vtx_desc.high.Hex = value;
break;
case CP_VAT_REG_A:
if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value;
break;
case CP_VAT_REG_B:
if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value;
break;
case CP_VAT_REG_C:
if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value;
break;
// Pointers to vertex arrays in GC RAM
case ARRAY_BASE:
array_bases[static_cast<CPArray>(sub_cmd & CP_ARRAY_MASK)] =
value & CommandProcessor::GetPhysicalAddressMask();
break;
case ARRAY_STRIDE:
array_strides[static_cast<CPArray>(sub_cmd & CP_ARRAY_MASK)] = value & 0xFF;
break;
default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value);
}
}
void CPState::FillCPMemoryArray(u32* memory) const
{
memory[MATINDEX_A] = matrix_index_a.Hex;
memory[MATINDEX_B] = matrix_index_b.Hex;
memory[VCD_LO] = vtx_desc.low.Hex;
memory[VCD_HI] = vtx_desc.high.Hex;
for (int i = 0; i < CP_NUM_VAT_REG; ++i)
{
memory[CP_VAT_REG_A + i] = vtx_attr[i].g0.Hex;
memory[CP_VAT_REG_B + i] = vtx_attr[i].g1.Hex;
memory[CP_VAT_REG_C + i] = vtx_attr[i].g2.Hex;
}
for (int i = 0; i < CP_NUM_ARRAYS; ++i)
{
memory[ARRAY_BASE + i] = array_bases[static_cast<CPArray>(i)];
memory[ARRAY_STRIDE + i] = array_strides[static_cast<CPArray>(i)];
}
}

View File

@ -5,12 +5,14 @@
#include <array>
#include <string>
#include <type_traits>
#include <utility>
#include "Common/BitField.h"
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h"
#include "Common/EnumMap.h"
#include "Common/MsgHandler.h"
enum
@ -53,24 +55,46 @@ enum
};
// Vertex array numbers
enum
enum class CPArray : u8
{
ARRAY_POSITION = 0,
ARRAY_NORMAL = 1,
ARRAY_COLOR0 = 2,
NUM_COLOR_ARRAYS = 2,
ARRAY_TEXCOORD0 = 4,
NUM_TEXCOORD_ARRAYS = 8,
Position = 0,
Normal = 1,
ARRAY_XF_A = 12, // Usually used for position matrices
ARRAY_XF_B = 13, // Usually used for normal matrices
ARRAY_XF_C = 14, // Usually used for tex coord matrices
ARRAY_XF_D = 15, // Usually used for light objects
Color0 = 2,
Color1 = 3,
// Number of arrays related to vertex components (position, normal, color, tex coord)
// Excludes the 4 arrays used for indexed XF loads
NUM_VERTEX_COMPONENT_ARRAYS = 12,
TexCoord0 = 4,
TexCoord1 = 5,
TexCoord2 = 6,
TexCoord3 = 7,
TexCoord4 = 8,
TexCoord5 = 9,
TexCoord6 = 10,
TexCoord7 = 11,
XF_A = 12, // Usually used for position matrices
XF_B = 13, // Usually used for normal matrices
XF_C = 14, // Usually used for tex coord matrices
XF_D = 15, // Usually used for light objects
};
template <>
struct fmt::formatter<CPArray> : EnumFormatter<CPArray::XF_D>
{
static constexpr array_type names = {"Position", "Normal", "Color 0", "Color 1",
"Tex Coord 0", "Tex Coord 1", "Tex Coord 2", "Tex Coord 3",
"Tex Coord 4", "Tex Coord 5", "Tex Coord 6", "Tex Coord 7",
"XF A", "XF B", "XF C", "XF D"};
formatter() : EnumFormatter(names) {}
};
// Intended for offsetting from Color0/TexCoord0
constexpr CPArray operator+(CPArray array, u8 offset)
{
return static_cast<CPArray>(static_cast<u8>(array) + offset);
}
// Number of arrays related to vertex components (position, normal, color, tex coord)
// Excludes the 4 arrays used for indexed XF loads
constexpr u8 NUM_VERTEX_COMPONENT_ARRAYS = 12;
// Vertex components
enum class VertexComponentFormat
@ -607,32 +631,29 @@ class VertexLoaderBase;
// STATE_TO_SAVE
struct CPState final
{
u32 array_bases[CP_NUM_ARRAYS]{};
u32 array_strides[CP_NUM_ARRAYS]{};
CPState() = default;
explicit CPState(const u32* memory);
// Mutates the CP state based on the given command and value.
void LoadCPReg(u8 sub_cmd, u32 value);
// Fills memory with data from CP regs. There should be space for 0x100 values in memory.
void FillCPMemoryArray(u32* memory) const;
Common::EnumMap<u32, CPArray::XF_D> array_bases;
Common::EnumMap<u32, CPArray::XF_D> array_strides;
TMatrixIndexA matrix_index_a{};
TMatrixIndexB matrix_index_b{};
TVtxDesc vtx_desc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT vtx_attr[CP_NUM_VAT_REG]{};
// Attributes that actually belong to VertexLoaderManager:
BitSet32 attr_dirty{};
bool bases_dirty = false;
VertexLoaderBase* vertex_loaders[CP_NUM_VAT_REG]{};
int last_id = 0;
std::array<VAT, CP_NUM_VAT_REG> vtx_attr{};
};
static_assert(std::is_trivially_copyable_v<CPState>);
class PointerWrap;
extern CPState g_main_cp_state;
extern CPState g_preprocess_cp_state;
// Might move this into its own file later.
void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);
// Fills memory with data from CP regs
void FillCPMemoryArray(u32* memory);
void DoCPState(PointerWrap& p);
void CopyPreprocessCPStateFromMain();

View File

@ -5,6 +5,7 @@
#include <atomic>
#include <cstring>
#include <fmt/format.h>
#include "Common/Assert.h"
#include "Common/ChunkFile.h"
@ -607,10 +608,10 @@ void SetCpClearRegister()
{
}
void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess)
{
// TODO(Omega): Maybe dump FIFO to file on this error
PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, {2}).\n"
PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, preprocess={2}).\n"
"This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n"
@ -618,7 +619,7 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
"* Some other sort of bug\n\n"
"Further errors will be sent to the Video Backend log and\n"
"Dolphin will now likely crash or hang. Enjoy.",
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");
cmd_byte, fmt::ptr(buffer), preprocess);
{
PanicAlertFmt("Illegal command {:02x}\n"

View File

@ -169,7 +169,7 @@ void SetCpClearRegister();
void SetCpControlRegister();
void SetCpStatusRegister();
void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess);
void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess);
u32 GetPhysicalAddressMask();

View File

@ -273,8 +273,8 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
}
}
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_pp_read_ptr = OpcodeDecoder::Run<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
s_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len;
}
@ -316,7 +316,7 @@ void RunGpuLoop()
if (write_ptr > seen_ptr)
{
s_video_buffer_read_ptr =
OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
OpcodeDecoder::RunFifo(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr);
s_video_buffer_seen_ptr = write_ptr;
}
}
@ -349,8 +349,8 @@ void RunGpuLoop()
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted);
fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed);
fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst);
@ -466,8 +466,8 @@ static int RunGpuOnCpu(int ticks)
}
ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed));
u32 cycles = 0;
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles);
available_ticks -= cycles;
}

View File

@ -837,12 +837,12 @@ bool FramebufferManager::CompilePokePipelines()
{
PortableVertexDeclaration vtx_decl = {};
vtx_decl.position.enable = true;
vtx_decl.position.type = VAR_FLOAT;
vtx_decl.position.type = ComponentFormat::Float;
vtx_decl.position.components = 4;
vtx_decl.position.integer = false;
vtx_decl.position.offset = offsetof(EFBPokeVertex, position);
vtx_decl.colors[0].enable = true;
vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE;
vtx_decl.colors[0].type = ComponentFormat::UByte;
vtx_decl.colors[0].components = 4;
vtx_decl.colors[0].integer = false;
vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color);

View File

@ -6,25 +6,29 @@
#include <cmath>
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
constexpr std::array<const char*, 4> primitives_ogl{
constexpr Common::EnumMap<const char*, PrimitiveType::TriangleStrip> primitives_ogl{
"points",
"lines",
"triangles",
"triangles",
};
constexpr std::array<const char*, 4> primitives_d3d{
constexpr Common::EnumMap<const char*, PrimitiveType::TriangleStrip> primitives_d3d{
"point",
"line",
"triangle",
"triangle",
};
constexpr Common::EnumMap<u32, PrimitiveType::TriangleStrip> vertex_in_map{1u, 2u, 3u, 3u};
constexpr Common::EnumMap<u32, PrimitiveType::TriangleStrip> vertex_out_map{4u, 4u, 4u, 3u};
bool geometry_shader_uid_data::IsPassthrough() const
{
const bool stereo = g_ActiveConfig.stereo_mode != StereoMode::Off;
@ -61,9 +65,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
const bool ssaa = host_config.ssaa;
const bool stereo = host_config.stereo;
const auto primitive_type = static_cast<PrimitiveType>(uid_data->primitive_type);
const auto primitive_type_index = static_cast<unsigned>(uid_data->primitive_type);
const auto vertex_in = std::min(static_cast<unsigned>(primitive_type_index) + 1, 3u);
u32 vertex_out = primitive_type == PrimitiveType::TriangleStrip ? 3 : 4;
const u32 vertex_in = vertex_in_map[primitive_type];
u32 vertex_out = vertex_out_map[primitive_type];
if (wireframe)
vertex_out++;
@ -73,14 +76,14 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
// Insert layout parameters
if (host_config.backend_gs_instancing)
{
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index],
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type],
stereo ? 2 : 1);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
vertex_out);
}
else
{
out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]);
out.Write("layout({}) in;\n", primitives_ogl[primitive_type]);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
stereo ? vertex_out * 2 : vertex_out);
}
@ -139,13 +142,13 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint "
"InstanceID : SV_GSInstanceID)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle");
}
else
{
out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle");
}
out.Write("\tVertexData ps;\n");

View File

@ -202,25 +202,27 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index)
void IndexGenerator::Init()
{
using OpcodeDecoder::Primitive;
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads<true>;
m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
}
else
{
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads<false>;
m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
}
m_primitive_table[OpcodeDecoder::GX_DRAW_LINES] = AddLineList;
m_primitive_table[OpcodeDecoder::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[OpcodeDecoder::GX_DRAW_POINTS] = AddPoints;
m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList;
m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints;
}
void IndexGenerator::Start(u16* index_ptr)
@ -230,7 +232,7 @@ void IndexGenerator::Start(u16* index_ptr)
m_base_index = 0;
}
void IndexGenerator::AddIndices(int primitive, u32 num_vertices)
void IndexGenerator::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
{
m_index_buffer_current =
m_primitive_table[primitive](m_index_buffer_current, num_vertices, m_base_index);

View File

@ -6,8 +6,9 @@
#pragma once
#include <array>
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/OpcodeDecoding.h"
class IndexGenerator
{
@ -15,7 +16,7 @@ public:
void Init();
void Start(u16* index_ptr);
void AddIndices(int primitive, u32 num_vertices);
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices);
@ -30,5 +31,5 @@ private:
u32 m_base_index = 0;
using PrimitiveFunction = u16* (*)(u16*, u32, u32);
std::array<PrimitiveFunction, 8> m_primitive_table{};
Common::EnumMap<PrimitiveFunction, OpcodeDecoder::Primitive::GX_DRAW_POINTS> m_primitive_table{};
};

View File

@ -8,6 +8,7 @@
#include "Common/CommonTypes.h"
#include "Common/Hash.h"
#include "VideoCommon/CPMemory.h"
// m_components
enum
@ -45,18 +46,9 @@ enum
VB_HAS_UVTEXMTXSHIFT = 13,
};
enum VarType
{
VAR_UNSIGNED_BYTE, // GX_U8 = 0
VAR_BYTE, // GX_S8 = 1
VAR_UNSIGNED_SHORT, // GX_U16 = 2
VAR_SHORT, // GX_S16 = 3
VAR_FLOAT, // GX_F32 = 4
};
struct AttributeFormat
{
VarType type;
ComponentFormat type;
int components;
int offset;
bool enable;

View File

@ -14,7 +14,7 @@
#include "VideoCommon/OpcodeDecoding.h"
#include "Common/CommonTypes.h"
#include "Common/Assert.h"
#include "Common/Logging/Log.h"
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
@ -24,55 +24,15 @@
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/XFMemory.h"
#include "VideoCommon/XFStructs.h"
namespace OpcodeDecoder
{
namespace
{
bool s_is_fifo_error_seen = false;
u32 InterpretDisplayList(u32 address, u32 size)
{
u8* start_address;
if (Fifo::UseDeterministicGPUThread())
start_address = static_cast<u8*>(Fifo::PopFifoAuxBuffer(size));
else
start_address = Memory::GetPointer(address);
u32 cycles = 0;
// Avoid the crash if Memory::GetPointer failed ..
if (start_address != nullptr)
{
// temporarily swap dl and non-dl (small "hack" for the stats)
g_stats.SwapDL();
Run(DataReader(start_address, start_address + size), &cycles, true);
INCSTAT(g_stats.this_frame.num_dlists_called);
// un-swap
g_stats.SwapDL();
}
return cycles;
}
void InterpretDisplayListPreprocess(u32 address, u32 size)
{
u8* const start_address = Memory::GetPointer(address);
Fifo::PushFifoAuxBuffer(start_address, size);
if (start_address == nullptr)
return;
Run<true>(DataReader(start_address, start_address + size), nullptr, true);
}
} // Anonymous namespace
static bool s_is_fifo_error_seen = false;
bool g_record_fifo_data = false;
void Init()
@ -81,202 +41,228 @@ void Init()
}
template <bool is_preprocess>
u8* Run(DataReader src, u32* cycles, bool in_display_list)
class RunCallback final : public Callback
{
u32 total_cycles = 0;
u8* opcode_start = nullptr;
const auto finish_up = [cycles, &opcode_start, &total_cycles] {
if (cycles != nullptr)
{
*cycles = total_cycles;
}
return opcode_start;
};
while (true)
public:
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
{
opcode_start = src.GetPointer();
m_cycles += 18 + 6 * count;
if (!src.size())
return finish_up();
const u8 cmd_byte = src.Read<u8>();
switch (cmd_byte)
{
case GX_NOP:
total_cycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
break;
case GX_UNKNOWN_RESET:
total_cycles += 6; // Datel software uses this command
DEBUG_LOG_FMT(VIDEO, "GX Reset?: {:08x}", cmd_byte);
break;
case GX_LOAD_CP_REG:
{
if (src.size() < 1 + 4)
return finish_up();
total_cycles += 12;
const u8 sub_cmd = src.Read<u8>();
const u32 value = src.Read<u32>();
LoadCPReg(sub_cmd, value, is_preprocess);
if constexpr (!is_preprocess)
INCSTAT(g_stats.this_frame.num_cp_loads);
}
break;
case GX_LOAD_XF_REG:
{
if (src.size() < 4)
return finish_up();
const u32 cmd2 = src.Read<u32>();
const u32 transfer_size = ((cmd2 >> 16) & 15) + 1;
if (src.size() < transfer_size * sizeof(u32))
return finish_up();
total_cycles += 18 + 6 * transfer_size;
if constexpr (!is_preprocess)
{
const u32 xf_address = cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address, src);
INCSTAT(g_stats.this_frame.num_xf_loads);
}
src.Skip<u32>(transfer_size);
}
break;
case GX_LOAD_INDX_A: // Used for position matrices
case GX_LOAD_INDX_B: // Used for normal matrices
case GX_LOAD_INDX_C: // Used for postmatrices
case GX_LOAD_INDX_D: // Used for lights
{
if (src.size() < 4)
return finish_up();
total_cycles += 6;
// Map the command byte to its ref array.
// GX_LOAD_INDX_A (32) -> 0xC
// GX_LOAD_INDX_B (40) -> 0xD
// GX_LOAD_INDX_C (48) -> 0xE
// GX_LOAD_INDX_D (56) -> 0xF
const int ref_array = (cmd_byte / 8) + 8;
if constexpr (is_preprocess)
PreprocessIndexedXF(src.Read<u32>(), ref_array);
else
LoadIndexedXF(src.Read<u32>(), ref_array);
}
break;
case GX_CMD_CALL_DL:
{
if (src.size() < 8)
return finish_up();
const u32 address = src.Read<u32>();
const u32 count = src.Read<u32>();
if (in_display_list)
{
total_cycles += 6;
INFO_LOG_FMT(VIDEO, "recursive display list detected");
}
else
{
if constexpr (is_preprocess)
InterpretDisplayListPreprocess(address, count);
else
total_cycles += 6 + InterpretDisplayList(address, count);
}
}
break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after
// that
total_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "GX 0x44: {:08x}", cmd_byte);
break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
total_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)");
break;
case GX_LOAD_BP_REG:
// In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{
if (src.size() < 4)
return finish_up();
total_cycles += 12;
const u32 bp_cmd = src.Read<u32>();
if constexpr (is_preprocess)
{
LoadBPRegPreprocess(bp_cmd, total_cycles);
}
else
{
LoadBPReg(bp_cmd, total_cycles);
INCSTAT(g_stats.this_frame.num_bp_loads);
}
}
break;
// draw primitives
default:
if ((cmd_byte & 0xC0) == 0x80)
{
// load vertices
if (src.size() < 2)
return finish_up();
const u16 num_vertices = src.Read<u16>();
const int bytes = VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, num_vertices, src, is_preprocess);
if (bytes < 0)
return finish_up();
src.Skip(bytes);
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
total_cycles += num_vertices * 4 * 3 + 6;
}
else
{
if (!s_is_fifo_error_seen)
CommandProcessor::HandleUnknownOpcode(cmd_byte, opcode_start, is_preprocess);
ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", cmd_byte,
fmt::ptr(opcode_start), is_preprocess ? "yes" : "no");
s_is_fifo_error_seen = true;
total_cycles += 1;
}
break;
}
// Display lists get added directly into the FIFO stream
if constexpr (!is_preprocess)
{
if (g_record_fifo_data && cmd_byte != GX_CMD_CALL_DL)
LoadXFReg(address, count, data);
INCSTAT(g_stats.this_frame.num_xf_loads);
}
}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
{
m_cycles += 12;
const u8 sub_command = command & CP_COMMAND_MASK;
if constexpr (!is_preprocess)
{
if (sub_command == MATINDEX_A)
VertexShaderManager::SetTexMatrixChangedA(value);
else if (sub_command == MATINDEX_B)
VertexShaderManager::SetTexMatrixChangedB(value);
else if (sub_command == VCD_LO || sub_command == VCD_HI)
{
const u8* const opcode_end = src.GetPointer();
FifoRecorder::GetInstance().WriteGPCommand(opcode_start, u32(opcode_end - opcode_start));
VertexLoaderManager::g_main_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
VertexLoaderManager::g_bases_dirty = true;
}
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
sub_command == CP_VAT_REG_C)
{
VertexLoaderManager::g_main_vat_dirty[command & CP_VAT_MASK] = true;
}
else if (sub_command == ARRAY_BASE)
{
VertexLoaderManager::g_bases_dirty = true;
}
INCSTAT(g_stats.this_frame.num_cp_loads);
}
else if constexpr (is_preprocess)
{
if (sub_command == VCD_LO || sub_command == VCD_HI)
{
VertexLoaderManager::g_preprocess_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
}
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
sub_command == CP_VAT_REG_C)
{
VertexLoaderManager::g_preprocess_vat_dirty[command & CP_VAT_MASK] = true;
}
}
GetCPState().LoadCPReg(command, value);
}
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
{
m_cycles += 12;
if constexpr (is_preprocess)
{
LoadBPRegPreprocess(command, value, m_cycles);
}
else
{
LoadBPReg(command, value, m_cycles);
INCSTAT(g_stats.this_frame.num_bp_loads);
}
}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
{
m_cycles += 6;
if constexpr (is_preprocess)
PreprocessIndexedXF(array, index, address, size);
else
LoadIndexedXF(array, index, address, size);
}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
{
// load vertices
const u32 size = vertex_size * num_vertices;
// HACK
DataReader src{const_cast<u8*>(vertex_data), const_cast<u8*>(vertex_data) + size};
const u32 bytes =
VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src, is_preprocess);
ASSERT(bytes == size);
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
m_cycles += num_vertices * 4 * 3 + 6;
}
// This can't be inlined since it calls Run, which makes it recursive
// m_in_display_list prevents it from actually recursing infinitely, but there's no real benefit
// to inlining Run for the display list directly.
OPCODE_CALLBACK_NOINLINE(void OnDisplayList(u32 address, u32 size))
{
m_cycles += 6;
if (m_in_display_list)
{
WARN_LOG_FMT(VIDEO, "recursive display list detected");
}
else
{
m_in_display_list = true;
if constexpr (is_preprocess)
{
const u8* const start_address = Memory::GetPointer(address);
Fifo::PushFifoAuxBuffer(start_address, size);
if (start_address != nullptr)
{
Run(start_address, size, *this);
}
}
else
{
const u8* start_address;
if (Fifo::UseDeterministicGPUThread())
start_address = static_cast<u8*>(Fifo::PopFifoAuxBuffer(size));
else
start_address = Memory::GetPointer(address);
// Avoid the crash if Memory::GetPointer failed ..
if (start_address != nullptr)
{
// temporarily swap dl and non-dl (small "hack" for the stats)
g_stats.SwapDL();
Run(start_address, size, *this);
INCSTAT(g_stats.this_frame.num_dlists_called);
// un-swap
g_stats.SwapDL();
}
}
m_in_display_list = false;
}
}
OPCODE_CALLBACK(void OnNop(u32 count))
{
m_cycles += 6 * count; // Hm, this means that we scan over nop streams pretty slowly...
}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
{
if (static_cast<Opcode>(opcode) == Opcode::GX_UNKNOWN_RESET)
{
// Datel software uses this command
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "GX Reset?");
}
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS)
{
// 'Zelda Four Swords' calls it and checks the metrics registers after that
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "GX 0x44");
}
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_INVL_VC)
{
// Invalidate Vertex Cache
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)");
}
else
{
if (!s_is_fifo_error_seen)
CommandProcessor::HandleUnknownOpcode(opcode, data, is_preprocess);
ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", opcode,
fmt::ptr(data), is_preprocess ? "yes" : "no");
s_is_fifo_error_seen = true;
m_cycles += 1;
}
}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size))
{
ASSERT(size >= 1);
if constexpr (!is_preprocess)
{
// Display lists get added directly into the FIFO stream since this same callback is used to
// process them.
if (g_record_fifo_data && static_cast<Opcode>(data[0]) != Opcode::GX_CMD_CALL_DL)
{
FifoRecorder::GetInstance().WriteGPCommand(data, size);
}
}
}
OPCODE_CALLBACK(CPState& GetCPState())
{
if constexpr (is_preprocess)
return g_preprocess_cp_state;
else
return g_main_cp_state;
}
u32 m_cycles = 0;
bool m_in_display_list = false;
};
template <bool is_preprocess>
u8* RunFifo(DataReader src, u32* cycles)
{
using CallbackT = RunCallback<is_preprocess>;
auto callback = CallbackT{};
u32 size = Run(src.GetPointer(), static_cast<u32>(src.size()), callback);
if (cycles != nullptr)
*cycles = callback.m_cycles;
src.Skip(size);
return src.GetPointer();
}
template u8* Run<true>(DataReader src, u32* cycles, bool in_display_list);
template u8* Run<false>(DataReader src, u32* cycles, bool in_display_list);
template u8* RunFifo<true>(DataReader src, u32* cycles);
template u8* RunFifo<false>(DataReader src, u32* cycles);
} // namespace OpcodeDecoder

View File

@ -3,8 +3,17 @@
#pragma once
#include "Common/CommonTypes.h"
#include <type_traits>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h"
#include "Common/Inline.h"
#include "Common/Swap.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/VertexLoaderBase.h"
struct CPState;
class DataReader;
namespace OpcodeDecoder
@ -12,7 +21,7 @@ namespace OpcodeDecoder
// Global flag to signal if FifoRecorder is active.
extern bool g_record_fifo_data;
enum
enum class Opcode
{
GX_NOP = 0x00,
GX_UNKNOWN_RESET = 0x01,
@ -27,20 +36,20 @@ enum
GX_CMD_CALL_DL = 0x40,
GX_CMD_UNKNOWN_METRICS = 0x44,
GX_CMD_INVL_VC = 0x48
GX_CMD_INVL_VC = 0x48,
GX_PRIMITIVE_START = 0x80,
GX_PRIMITIVE_END = 0xbf,
};
enum
{
GX_PRIMITIVE_MASK = 0x78,
GX_PRIMITIVE_SHIFT = 3,
GX_VAT_MASK = 0x07
};
constexpr u8 GX_PRIMITIVE_MASK = 0x78;
constexpr u32 GX_PRIMITIVE_SHIFT = 3;
constexpr u8 GX_VAT_MASK = 0x07;
// These values are the values extracted using GX_PRIMITIVE_MASK
// and GX_PRIMITIVE_SHIFT.
// GX_DRAW_QUADS_2 behaves the same way as GX_DRAW_QUADS.
enum
enum class Primitive : u8
{
GX_DRAW_QUADS = 0x0, // 0x80
GX_DRAW_QUADS_2 = 0x1, // 0x88
@ -54,7 +63,232 @@ enum
void Init();
// Interface for the Run and RunCommand functions below.
// The functions themselves are templates so that the compiler generates separate versions for each
// callback (with the callback functions inlined), so the callback doesn't actually need to be
// publicly inherited.
// Compilers don't generate warnings for failed inlining with virtual functions, so this define
// allows disabling the use of virtual functions to generate those warnings. However, this means
// that missing functions will generate errors on their use in RunCommand, instead of in the
// subclass, which can be confusing.
#define OPCODE_CALLBACK_USE_INHERITANCE
#ifdef OPCODE_CALLBACK_USE_INHERITANCE
#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig override
#define OPCODE_CALLBACK_NOINLINE(sig) sig override
#else
#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig
#define OPCODE_CALLBACK_NOINLINE(sig) sig
#endif
class Callback
{
#ifdef OPCODE_CALLBACK_USE_INHERITANCE
public:
virtual ~Callback() = default;
// Called on any XF command.
virtual void OnXF(u16 address, u8 count, const u8* data) = 0;
// Called on any CP command.
// Subclasses should update the CP state with GetCPState().LoadCPReg(command, value) so that
// primitive commands decode properly.
virtual void OnCP(u8 command, u32 value) = 0;
// Called on any BP command.
virtual void OnBP(u8 command, u32 value) = 0;
// Called on any indexed XF load command.
virtual void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size) = 0;
// Called on any primitive command.
virtual void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, u32 vertex_size,
u16 num_vertices, const u8* vertex_data) = 0;
// Called on a display list.
virtual void OnDisplayList(u32 address, u32 size) = 0;
// Called on any NOP commands (which are all merged into a single call).
virtual void OnNop(u32 count) = 0;
// Called on an unknown opcode, or an opcode that is known but not implemented.
// data[0] is opcode.
virtual void OnUnknown(u8 opcode, const u8* data) = 0;
// Called on ANY command. The first byte of data is the opcode. Size will be at least 1.
// This function is called after one of the above functions is called.
virtual void OnCommand(const u8* data, u32 size) = 0;
// Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands.
virtual CPState& GetCPState() = 0;
#endif
};
namespace detail
{
// Main logic; split so that the main RunCommand can call OnCommand with the returned size.
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback)
{
if (available < 1)
return 0;
const Opcode cmd = static_cast<Opcode>(data[0]);
switch (cmd)
{
case Opcode::GX_NOP:
{
u32 count = 1;
while (count < available && static_cast<Opcode>(data[count]) == Opcode::GX_NOP)
count++;
callback.OnNop(count);
return count;
}
case Opcode::GX_LOAD_CP_REG:
{
if (available < 6)
return 0;
const u8 cmd2 = data[1];
const u32 value = Common::swap32(&data[2]);
callback.OnCP(cmd2, value);
return 6;
}
case Opcode::GX_LOAD_XF_REG:
{
if (available < 5)
return 0;
const u32 cmd2 = Common::swap32(&data[1]);
const u16 base_address = cmd2 & 0xffff;
const u16 stream_size_temp = cmd2 >> 16;
ASSERT(stream_size_temp < 16);
const u8 stream_size = (stream_size_temp & 0xf) + 1;
if (available < u32(5 + stream_size * 4))
return 0;
callback.OnXF(base_address, stream_size, &data[5]);
return 5 + stream_size * 4;
}
case Opcode::GX_LOAD_INDX_A: // Used for position matrices
case Opcode::GX_LOAD_INDX_B: // Used for normal matrices
case Opcode::GX_LOAD_INDX_C: // Used for postmatrices
case Opcode::GX_LOAD_INDX_D: // Used for lights
{
if (available < 5)
return 0;
const u32 value = Common::swap32(&data[1]);
const u32 index = value >> 16;
const u16 address = value & 0xFFF; // TODO: check mask
const u8 size = ((value >> 12) & 0xF) + 1;
// Map the command byte to its ref array.
// GX_LOAD_INDX_A (32 = 8*4) . CPArray::XF_A (4+8 = 12)
// GX_LOAD_INDX_B (40 = 8*5) . CPArray::XF_B (5+8 = 13)
// GX_LOAD_INDX_C (48 = 8*6) . CPArray::XF_C (6+8 = 14)
// GX_LOAD_INDX_D (56 = 8*7) . CPArray::XF_D (7+8 = 15)
const auto ref_array = static_cast<CPArray>((static_cast<u8>(cmd) / 8) + 8);
callback.OnIndexedLoad(ref_array, index, address, size);
return 5;
}
case Opcode::GX_CMD_CALL_DL:
{
if (available < 9)
return 0;
const u32 address = Common::swap32(&data[1]);
const u32 size = Common::swap32(&data[5]);
callback.OnDisplayList(address, size);
return 9;
}
case Opcode::GX_LOAD_BP_REG:
{
if (available < 5)
return 0;
const u8 cmd2 = data[1];
const u32 value = Common::swap24(&data[2]);
callback.OnBP(cmd2, value);
return 5;
}
default:
if (cmd >= Opcode::GX_PRIMITIVE_START && cmd <= Opcode::GX_PRIMITIVE_END)
{
if (available < 3)
return 0;
const u8 cmdbyte = static_cast<u8>(cmd);
const OpcodeDecoder::Primitive primitive = static_cast<OpcodeDecoder::Primitive>(
(cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT);
const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK;
const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc,
callback.GetCPState().vtx_attr[vat]);
const u16 num_vertices = Common::swap16(&data[1]);
if (available < 3 + num_vertices * vertex_size)
return 0;
callback.OnPrimitiveCommand(primitive, vat, vertex_size, num_vertices, &data[3]);
return 3 + num_vertices * vertex_size;
}
}
callback.OnUnknown(static_cast<u8>(cmd), data);
return 1;
}
} // namespace detail
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback)
{
const u32 size = detail::RunCommand(data, available, callback);
if (size > 0)
{
callback.OnCommand(data, size);
}
return size;
}
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
DOLPHIN_FORCE_INLINE u32 Run(const u8* data, u32 available, T& callback)
{
u32 size = 0;
while (size < available)
{
const u32 command_size = RunCommand(&data[size], available - size, callback);
if (command_size == 0)
break;
size += command_size;
}
return size;
}
template <bool is_preprocess = false>
u8* Run(DataReader src, u32* cycles, bool in_display_list);
u8* RunFifo(DataReader src, u32* cycles);
} // namespace OpcodeDecoder
template <>
struct fmt::formatter<OpcodeDecoder::Primitive>
: EnumFormatter<OpcodeDecoder::Primitive::GX_DRAW_POINTS>
{
static constexpr array_type names = {
"GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)",
"GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP",
"GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES",
"GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS",
};
formatter() : EnumFormatter(names) {}
};

View File

@ -8,6 +8,7 @@
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/BoundingBox.h"
@ -40,7 +41,7 @@ enum : u32
C_PENVCONST_END = C_EFBSCALE + 1
};
constexpr std::array<const char*, 32> tev_ksel_table_c{
constexpr Common::EnumMap<const char*, KonstSel::K3_A> tev_ksel_table_c{
"255,255,255", // 1 = 0x00
"223,223,223", // 7_8 = 0x01
"191,191,191", // 3_4 = 0x02
@ -75,7 +76,7 @@ constexpr std::array<const char*, 32> tev_ksel_table_c{
I_KCOLORS "[3].aaa", // K3_A = 0x1F
};
constexpr std::array<const char*, 32> tev_ksel_table_a{
constexpr Common::EnumMap<const char*, KonstSel::K3_A> tev_ksel_table_a{
"255", // 1 = 0x00
"223", // 7_8 = 0x01
"191", // 3_4 = 0x02
@ -110,7 +111,7 @@ constexpr std::array<const char*, 32> tev_ksel_table_a{
I_KCOLORS "[3].a", // K3_A = 0x1F
};
constexpr std::array<const char*, 16> tev_c_input_table{
constexpr Common::EnumMap<const char*, TevColorArg::Zero> tev_c_input_table{
"prev.rgb", // CPREV,
"prev.aaa", // APREV,
"c0.rgb", // C0,
@ -129,7 +130,7 @@ constexpr std::array<const char*, 16> tev_c_input_table{
"int3(0,0,0)", // ZERO
};
constexpr std::array<const char*, 8> tev_a_input_table{
constexpr Common::EnumMap<const char*, TevAlphaArg::Zero> tev_a_input_table{
"prev.a", // APREV,
"c0.a", // A0,
"c1.a", // A1,
@ -140,7 +141,7 @@ constexpr std::array<const char*, 8> tev_a_input_table{
"0", // ZERO
};
constexpr std::array<const char*, 8> tev_ras_table{
constexpr Common::EnumMap<const char*, RasColorChan::Zero> tev_ras_table{
"iround(col0 * 255.0)",
"iround(col1 * 255.0)",
"ERROR13", // 2
@ -151,14 +152,14 @@ constexpr std::array<const char*, 8> tev_ras_table{
"int4(0, 0, 0, 0)", // zero
};
constexpr std::array<const char*, 4> tev_c_output_table{
constexpr Common::EnumMap<const char*, TevOutput::Color2> tev_c_output_table{
"prev.rgb",
"c0.rgb",
"c1.rgb",
"c2.rgb",
};
constexpr std::array<const char*, 4> tev_a_output_table{
constexpr Common::EnumMap<const char*, TevOutput::Color2> tev_a_output_table{
"prev.a",
"c0.a",
"c1.a",
@ -1160,11 +1161,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac;
if (last_cc.dest != TevOutput::Prev)
{
out.Write("\tprev.rgb = {};\n", tev_c_output_table[u32(last_cc.dest.Value())]);
out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]);
}
if (last_ac.dest != TevOutput::Prev)
{
out.Write("\tprev.a = {};\n", tev_a_output_table[u32(last_ac.dest.Value())]);
out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]);
}
}
out.Write("\tprev = prev & 255;\n");
@ -1277,6 +1278,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
APIType api_type, bool stereo)
{
using Common::EnumMap;
const auto& stage = uid_data->stagehash[n];
out.Write("\n\t// TEV stage {}\n", n);
@ -1303,7 +1306,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
// using iindtex{} as the offset coords
if (has_ind_stage && tevind.bs != IndTexBumpAlpha::Off)
{
static constexpr std::array<const char*, 4> tev_ind_alpha_sel{
static constexpr EnumMap<const char*, IndTexBumpAlpha::U> tev_ind_alpha_sel{
"",
"x",
"y",
@ -1316,16 +1319,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
// https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L3038-L3041
// https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L790-L800
static constexpr std::array<char, 4> tev_ind_alpha_shift{
static constexpr EnumMap<char, IndTexFormat::ITF_3> tev_ind_alpha_shift{
'0', // ITF_8: 0bXXXXXYYY -> 0bXXXXX000? No shift?
'5', // ITF_5: 0bIIIIIAAA -> 0bAAA00000, shift of 5
'4', // ITF_4: 0bIIIIAAAA -> 0bAAAA0000, shift of 4
'3', // ITF_3: 0bIIIAAAAA -> 0bAAAAA000, shift of 3
};
out.Write("\talphabump = (iindtex{}.{} << {}) & 248;\n", tevind.bt.Value(),
tev_ind_alpha_sel[u32(tevind.bs.Value())],
tev_ind_alpha_shift[u32(tevind.fmt.Value())]);
out.Write("\talphabump = (iindtex{}.{} << {}) & 248;\n", tevind.bt,
tev_ind_alpha_sel[tevind.bs], tev_ind_alpha_shift[tevind.fmt]);
}
else
{
@ -1335,23 +1337,23 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
if (has_ind_stage && tevind.matrix_index != IndMtxIndex::Off)
{
// format
static constexpr std::array<char, 4> tev_ind_fmt_shift{
static constexpr EnumMap<char, IndTexFormat::ITF_3> tev_ind_fmt_shift{
'0', // ITF_8: 0bXXXXXXXX -> 0bXXXXXXXX, no shift
'3', // ITF_5: 0bIIIIIAAA -> 0b000IIIII, shift of 3
'4', // ITF_4: 0bIIIIAAAA -> 0b0000IIII, shift of 4
'5', // ITF_3: 0bIIIAAAAA -> 0b00000III, shift of 5
};
out.Write("\tint3 iindtevcrd{} = iindtex{} >> {};\n", n, tevind.bt.Value(),
tev_ind_fmt_shift[u32(tevind.fmt.Value())]);
out.Write("\tint3 iindtevcrd{} = iindtex{} >> {};\n", n, tevind.bt,
tev_ind_fmt_shift[tevind.fmt]);
// bias - TODO: Check if this needs to be this complicated...
// indexed by bias
static constexpr std::array<const char*, 8> tev_ind_bias_field{
static constexpr EnumMap<const char*, IndTexBias::STU> tev_ind_bias_field{
"", "x", "y", "xy", "z", "xz", "yz", "xyz",
};
// indexed by fmt
static constexpr std::array<const char*, 4> tev_ind_bias_add{
static constexpr EnumMap<const char*, IndTexFormat::ITF_3> tev_ind_bias_add{
"-128",
"1",
"1",
@ -1361,22 +1363,19 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
if (tevind.bias == IndTexBias::S || tevind.bias == IndTexBias::T ||
tevind.bias == IndTexBias::U)
{
out.Write("\tiindtevcrd{}.{} += int({});\n", n,
tev_ind_bias_field[u32(tevind.bias.Value())],
tev_ind_bias_add[u32(tevind.fmt.Value())]);
out.Write("\tiindtevcrd{}.{} += int({});\n", n, tev_ind_bias_field[tevind.bias],
tev_ind_bias_add[tevind.fmt]);
}
else if (tevind.bias == IndTexBias::ST || tevind.bias == IndTexBias::SU ||
tevind.bias == IndTexBias::TU_)
{
out.Write("\tiindtevcrd{0}.{1} += int2({2}, {2});\n", n,
tev_ind_bias_field[u32(tevind.bias.Value())],
tev_ind_bias_add[u32(tevind.fmt.Value())]);
out.Write("\tiindtevcrd{0}.{1} += int2({2}, {2});\n", n, tev_ind_bias_field[tevind.bias],
tev_ind_bias_add[tevind.fmt]);
}
else if (tevind.bias == IndTexBias::STU)
{
out.Write("\tiindtevcrd{0}.{1} += int3({2}, {2}, {2});\n", n,
tev_ind_bias_field[u32(tevind.bias.Value())],
tev_ind_bias_add[u32(tevind.fmt.Value())]);
tev_ind_bias_field[tevind.bias], tev_ind_bias_add[tevind.fmt]);
}
// Multiplied by 2 because each matrix has two rows.
@ -1535,7 +1534,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
'\0',
};
out.Write("\trastemp = {}.{};\n", tev_ras_table[u32(stage.tevorders_colorchan)], rasswap);
out.Write("\trastemp = {}.{};\n", tev_ras_table[stage.tevorders_colorchan], rasswap);
}
if (stage.tevorders_enable && uid_data->genMode_numtexgens > 0)
@ -1567,8 +1566,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
cc.d == TevColorArg::Konst || ac.a == TevAlphaArg::Konst || ac.b == TevAlphaArg::Konst ||
ac.c == TevAlphaArg::Konst || ac.d == TevAlphaArg::Konst)
{
out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[u32(stage.tevksel_kc)],
tev_ksel_table_a[u32(stage.tevksel_ka)]);
out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[stage.tevksel_kc],
tev_ksel_table_a[stage.tevksel_ka]);
if (u32(stage.tevksel_kc) > 7)
{
@ -1599,51 +1598,50 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VECTOR_BITWISE_AND))
{
out.Write("\ttevin_a = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.a.Value())],
tev_a_input_table[u32(ac.a.Value())]);
out.Write("\ttevin_b = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.b.Value())],
tev_a_input_table[u32(ac.b.Value())]);
out.Write("\ttevin_c = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.c.Value())],
tev_a_input_table[u32(ac.c.Value())]);
out.Write("\ttevin_a = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.a],
tev_a_input_table[ac.a]);
out.Write("\ttevin_b = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.b],
tev_a_input_table[ac.b]);
out.Write("\ttevin_c = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.c],
tev_a_input_table[ac.c]);
}
else
{
out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n",
tev_c_input_table[u32(cc.a.Value())], tev_a_input_table[u32(ac.a.Value())]);
out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n",
tev_c_input_table[u32(cc.b.Value())], tev_a_input_table[u32(ac.b.Value())]);
out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n",
tev_c_input_table[u32(cc.c.Value())], tev_a_input_table[u32(ac.c.Value())]);
out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.a],
tev_a_input_table[ac.a]);
out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.b],
tev_a_input_table[ac.b]);
out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.c],
tev_a_input_table[ac.c]);
}
out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[u32(cc.d.Value())],
tev_a_input_table[u32(ac.d.Value())]);
out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[cc.d], tev_a_input_table[ac.d]);
out.Write("\t// color combine\n");
out.Write("\t{} = clamp(", tev_c_output_table[u32(cc.dest.Value())]);
out.Write("\t{} = clamp(", tev_c_output_table[cc.dest]);
if (cc.bias != TevBias::Compare)
{
WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.scale, false);
}
else
{
static constexpr std::array<const char*, 8> function_table{
"((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TevCompareMode::R8, GT
"((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // R8, TevComparison::EQ
"((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : "
"int3(0,0,0))", // GR16, GT
"((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : "
"int3(0,0,0))", // GR16, EQ
"((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : "
"int3(0,0,0))", // BGR24, GT
"((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : "
"int3(0,0,0))", // BGR24, EQ
"(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // RGB8, GT
"((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // RGB8, EQ
static constexpr EnumMap<const char*, TevCompareMode::RGB8> tev_rgb_comparison_gt{
"((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TevCompareMode::R8
"((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // GR16
"((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // BGR24
"(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // RGB8
};
const u32 mode = (u32(cc.compare_mode.Value()) << 1) | u32(cc.comparison.Value());
out.Write(" tevin_d.rgb + ");
out.Write("{}", function_table[mode]);
static constexpr EnumMap<const char*, TevCompareMode::RGB8> tev_rgb_comparison_eq{
"((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0))", // TevCompareMode::R8
"((idot(tevin_a.rgb,comp16) == idot(tevin_b.rgb,comp16)) ? tevin_c.rgb : int3(0,0,0))", // GR16
"((idot(tevin_a.rgb,comp24) == idot(tevin_b.rgb,comp24)) ? tevin_c.rgb : int3(0,0,0))", // BGR24
"((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // RGB8
};
if (cc.comparison == TevComparison::EQ)
out.Write(" tevin_d.rgb + {}", tev_rgb_comparison_eq[cc.compare_mode]);
else
out.Write(" tevin_d.rgb + {}", tev_rgb_comparison_gt[cc.compare_mode]);
}
if (cc.clamp)
out.Write(", int3(0,0,0), int3(255,255,255))");
@ -1652,27 +1650,31 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
out.Write(";\n");
out.Write("\t// alpha combine\n");
out.Write("\t{} = clamp(", tev_a_output_table[u32(ac.dest.Value())]);
out.Write("\t{} = clamp(", tev_a_output_table[ac.dest]);
if (ac.bias != TevBias::Compare)
{
WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.scale, true);
}
else
{
static constexpr std::array<const char*, 8> function_table{
"((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8, GT
"((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // R8, TevComparison::EQ
"((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, GT
"((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, EQ
"((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, GT
"((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, EQ
"((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // A8, GT
"((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // A8, EQ
static constexpr EnumMap<const char*, TevCompareMode::A8> tev_a_comparison_gt{
"((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8
"((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16
"((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24
"((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // A8
};
const u32 mode = (u32(ac.compare_mode.Value()) << 1) | u32(ac.comparison.Value());
out.Write(" tevin_d.a + ");
out.Write("{}", function_table[mode]);
static constexpr EnumMap<const char*, TevCompareMode::A8> tev_a_comparison_eq{
"((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8
"((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16,
"((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24,
"((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)", // A8
};
if (ac.comparison == TevComparison::EQ)
out.Write(" tevin_d.a + {}", tev_a_comparison_eq[ac.compare_mode]);
else
out.Write(" tevin_d.a + {}", tev_a_comparison_gt[ac.compare_mode]);
}
if (ac.clamp)
out.Write(", 0, 255)");
@ -1685,36 +1687,33 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op,
bool clamp, TevScale scale, bool alpha)
{
static constexpr std::array<const char*, 4> tev_scale_table_left{
static constexpr Common::EnumMap<const char*, TevScale::Divide2> tev_scale_table_left{
"", // Scale1
" << 1", // Scale2
" << 2", // Scale4
"", // Divide2
};
static constexpr std::array<const char*, 4> tev_scale_table_right{
static constexpr Common::EnumMap<const char*, TevScale::Divide2> tev_scale_table_right{
"", // Scale1
"", // Scale2
"", // Scale4
" >> 1", // Divide2
};
// indexed by 2*op+(scale==Divide2)
static constexpr std::array<const char*, 4> tev_lerp_bias{
"",
static constexpr Common::EnumMap<const char*, TevOp::Sub> tev_lerp_bias{
" + 128",
"",
" + 127",
};
static constexpr std::array<const char*, 4> tev_bias_table{
static constexpr Common::EnumMap<const char*, TevBias::Compare> tev_bias_table{
"", // Zero,
" + 128", // AddHalf,
" - 128", // SubHalf,
"",
};
static constexpr std::array<char, 2> tev_op_table{
static constexpr Common::EnumMap<char, TevOp::Sub> tev_op_table{
'+', // TevOp::Add = 0,
'-', // TevOp::Sub = 1,
};
@ -1724,17 +1723,16 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBia
// - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255
// - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy
// - a rounding bias is added before dividing by 256
out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[u32(bias)],
tev_scale_table_left[u32(scale)]);
out.Write(" {} ", tev_op_table[u32(op)]);
out.Write("(((((tevin_a.{}<<8) + (tevin_b.{}-tevin_a.{})*(tevin_c.{}+(tevin_c.{}>>7))){}){})>>8)",
components, components, components, components, components,
tev_scale_table_left[u32(scale)],
tev_lerp_bias[2 * u32(op) + ((scale == TevScale::Divide2) == alpha)]);
out.Write("){}", tev_scale_table_right[u32(scale)]);
out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[bias], tev_scale_table_left[scale]);
out.Write(" {} ", tev_op_table[op]);
out.Write("(((((tevin_a.{0}<<8) + "
"(tevin_b.{0}-tevin_a.{0})*(tevin_c.{0}+(tevin_c.{0}>>7))){1}){2})>>8)",
components, tev_scale_table_left[scale],
((scale == TevScale::Divide2) == alpha) ? tev_lerp_bias[op] : "");
out.Write("){}", tev_scale_table_right[scale]);
}
constexpr std::array<const char*, 8> tev_alpha_funcs_table{
constexpr Common::EnumMap<const char*, CompareMode::Always> tev_alpha_funcs_table{
"(false)", // CompareMode::Never
"(prev.a < {})", // CompareMode::Less
"(prev.a == {})", // CompareMode::Equal
@ -1745,7 +1743,7 @@ constexpr std::array<const char*, 8> tev_alpha_funcs_table{
"(true)" // CompareMode::Always
};
constexpr std::array<const char*, 4> tev_alpha_funclogic_table{
constexpr Common::EnumMap<const char*, AlphaTestOp::Xnor> tev_alpha_funclogic_table{
" && ", // and
" || ", // or
" != ", // xor
@ -1763,9 +1761,9 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
const auto write_alpha_func = [&out](CompareMode mode, std::string_view ref) {
const bool has_no_arguments = mode == CompareMode::Never || mode == CompareMode::Always;
if (has_no_arguments)
out.Write("{}", tev_alpha_funcs_table[u32(mode)]);
out.Write("{}", tev_alpha_funcs_table[mode]);
else
out.Write(tev_alpha_funcs_table[u32(mode)], ref);
out.Write(tev_alpha_funcs_table[mode], ref);
};
out.SetConstantsUsed(C_ALPHA, C_ALPHA);
@ -1779,7 +1777,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
write_alpha_func(uid_data->alpha_test_comp0, alpha_ref[0]);
// Lookup the logic op
out.Write("{}", tev_alpha_funclogic_table[u32(uid_data->alpha_test_logic)]);
out.Write("{}", tev_alpha_funclogic_table[uid_data->alpha_test_logic]);
// Lookup the second component from the alpha function table
write_alpha_func(uid_data->alpha_test_comp1, alpha_ref[1]);
@ -1809,7 +1807,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
out.Write("\t}}\n");
}
constexpr std::array<const char*, 8> tev_fog_funcs_table{
constexpr Common::EnumMap<const char*, FogType::BackwardsExpSq> tev_fog_funcs_table{
"", // No Fog
"", // ?
"", // Linear
@ -1866,7 +1864,7 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
if (uid_data->fog_fsel >= FogType::Exp)
{
out.Write("{}", tev_fog_funcs_table[u32(uid_data->fog_fsel)]);
out.Write("{}", tev_fog_funcs_table[uid_data->fog_fsel]);
}
else
{
@ -1919,7 +1917,8 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
{
if (uid_data->blend_enable)
{
static constexpr std::array<const char*, 8> blend_src_factor{
using Common::EnumMap;
static constexpr EnumMap<const char*, SrcBlendFactor::InvDstAlpha> blend_src_factor{
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"initial_ocol0.rgb;", // DSTCLR
@ -1929,7 +1928,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static constexpr std::array<const char*, 8> blend_src_factor_alpha{
static constexpr EnumMap<const char*, SrcBlendFactor::InvDstAlpha> blend_src_factor_alpha{
"0.0;", // ZERO
"1.0;", // ONE
"initial_ocol0.a;", // DSTCLR
@ -1939,7 +1938,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
static constexpr std::array<const char*, 8> blend_dst_factor{
static constexpr EnumMap<const char*, DstBlendFactor::InvDstAlpha> blend_dst_factor{
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"ocol0.rgb;", // SRCCLR
@ -1949,7 +1948,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static constexpr std::array<const char*, 8> blend_dst_factor_alpha{
static constexpr EnumMap<const char*, DstBlendFactor::InvDstAlpha> blend_dst_factor_alpha{
"0.0;", // ZERO
"1.0;", // ONE
"ocol0.a;", // SRCCLR
@ -1960,13 +1959,11 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
out.Write("\tfloat4 blend_src;\n");
out.Write("\tblend_src.rgb = {}\n", blend_src_factor[u32(uid_data->blend_src_factor)]);
out.Write("\tblend_src.a = {}\n",
blend_src_factor_alpha[u32(uid_data->blend_src_factor_alpha)]);
out.Write("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]);
out.Write("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]);
out.Write("\tfloat4 blend_dst;\n");
out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[u32(uid_data->blend_dst_factor)]);
out.Write("\tblend_dst.a = {}\n",
blend_dst_factor_alpha[u32(uid_data->blend_dst_factor_alpha)]);
out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[uid_data->blend_dst_factor]);
out.Write("\tblend_dst.a = {}\n", blend_dst_factor_alpha[uid_data->blend_dst_factor_alpha]);
out.Write("\tfloat4 blend_result;\n");
if (uid_data->blend_subtract)

View File

@ -963,7 +963,7 @@ void Renderer::RecordVideoMemory()
const u32* xfregs_ptr = reinterpret_cast<const u32*>(&xfmem) + FifoDataFile::XF_MEM_SIZE;
u32 xfregs_size = sizeof(XFMemory) / 4 - FifoDataFile::XF_MEM_SIZE;
FillCPMemoryArray(cpmem);
g_main_cp_state.FillCPMemoryArray(cpmem);
FifoRecorder::GetInstance().SetVideoMemory(bpmem_ptr, cpmem, xfmem_ptr, xfregs_ptr, xfregs_size,
texMem);
@ -986,9 +986,9 @@ bool Renderer::InitializeImGui()
ImGui::GetStyle().WindowRounding = 7.0f;
PortableVertexDeclaration vdecl = {};
vdecl.position = {VAR_FLOAT, 2, offsetof(ImDrawVert, pos), true, false};
vdecl.texcoords[0] = {VAR_FLOAT, 2, offsetof(ImDrawVert, uv), true, false};
vdecl.colors[0] = {VAR_UNSIGNED_BYTE, 4, offsetof(ImDrawVert, col), true, false};
vdecl.position = {ComponentFormat::Float, 2, offsetof(ImDrawVert, pos), true, false};
vdecl.texcoords[0] = {ComponentFormat::Float, 2, offsetof(ImDrawVert, uv), true, false};
vdecl.colors[0] = {ComponentFormat::UByte, 4, offsetof(ImDrawVert, col), true, false};
vdecl.stride = sizeof(ImDrawVert);
m_imgui_vertex_format = CreateNativeVertexFormat(vdecl);
if (!m_imgui_vertex_format)

View File

@ -1095,7 +1095,7 @@ void ShaderCache::QueueUberShaderPipelines()
// All attributes will be enabled in GetUberVertexFormat.
PortableVertexDeclaration dummy_vertex_decl = {};
dummy_vertex_decl.position.components = 4;
dummy_vertex_decl.position.type = VAR_FLOAT;
dummy_vertex_decl.position.type = ComponentFormat::Float;
dummy_vertex_decl.position.enable = true;
dummy_vertex_decl.stride = sizeof(float) * 4;
NativeVertexFormat* dummy_vertex_format =

View File

@ -13,10 +13,11 @@
#include "Common/BitField.h"
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/StringUtil.h"
#include "Common/TypeUtils.h"
enum class APIType;
#include "VideoCommon/VideoCommon.h"
/**
* Common interface for classes that need to go through the shader generation path
@ -210,6 +211,64 @@ std::string BitfieldExtract(std::string_view source)
static_cast<u32>(BitFieldT::NumBits()));
}
template <auto last_member, typename = decltype(last_member)>
void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable,
const Common::EnumMap<std::string_view, last_member>& values, int indent,
bool break_)
{
const bool make_switch = (ApiType == APIType::D3D);
// The second template argument is needed to avoid compile errors from ambiguity with multiple
// enums with the same number of members in GCC prior to 8. See https://godbolt.org/z/xcKaW1seW
// and https://godbolt.org/z/hz7Yqq1P5
using enum_type = decltype(last_member);
// {:{}} is used to indent by formatting an empty string with a variable width
if (make_switch)
{
out.Write("{:{}}switch ({}) {{\n", "", indent, variable);
for (u32 i = 0; i <= static_cast<u32>(last_member); i++)
{
const enum_type key = static_cast<enum_type>(i);
// Assumes existence of an EnumFormatter
out.Write("{:{}}case {:s}:\n", "", indent, key);
// Note that this indentation behaves poorly for multi-line code
if (!values[key].empty())
out.Write("{:{}} {}\n", "", indent, values[key]);
if (break_)
out.Write("{:{}} break;\n", "", indent);
}
out.Write("{:{}}}}\n", "", indent);
}
else
{
// Generate a tree of if statements recursively
// std::function must be used because auto won't capture before initialization and thus can't be
// used recursively
std::function<void(u32, u32, u32)> BuildTree = [&](u32 cur_indent, u32 low, u32 high) {
// Each generated statement is for low <= x < high
if (high == low + 1)
{
// Down to 1 case (low <= x < low + 1 means x == low)
const enum_type key = static_cast<enum_type>(low);
// Note that this indentation behaves poorly for multi-line code
out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key);
}
else
{
u32 mid = low + ((high - low) / 2);
out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid);
BuildTree(cur_indent + 2, low, mid);
out.Write("{:{}}}} else {{\n", "", cur_indent);
BuildTree(cur_indent + 2, mid, high);
out.Write("{:{}}}}\n", "", cur_indent);
}
};
BuildTree(indent, 0, static_cast<u32>(last_member) + 1);
}
}
// Constant variable names
#define I_COLORS "color"
#define I_KCOLORS "k"

View File

@ -1300,42 +1300,30 @@ TextureCacheBase::GetTexture(const int textureCacheSafetyColorSampleSize, Textur
// Search the texture cache for textures by address
//
// Find all texture cache entries for the current texture address, and decide whether to use one
// of
// them, or to create a new one
// of them, or to create a new one
//
// In most cases, the fastest way is to use only one texture cache entry for the same address.
// Usually,
// when a texture changes, the old version of the texture is unlikely to be used again. If there
// were
// new cache entries created for normal texture updates, there would be a slowdown due to a huge
// amount
// of unused cache entries. Also thanks to texture pooling, overwriting an existing cache entry is
// faster than creating a new one from scratch.
// Usually, when a texture changes, the old version of the texture is unlikely to be used again.
// If there were new cache entries created for normal texture updates, there would be a slowdown
// due to a huge amount of unused cache entries. Also thanks to texture pooling, overwriting an
// existing cache entry is faster than creating a new one from scratch.
//
// Some games use the same address for different textures though. If the same cache entry was used
// in
// this case, it would be constantly overwritten, and effectively there wouldn't be any caching
// for
// those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has
// multiple
// sets of fonts on each other stored in a single texture and uses the palette to make different
// characters visible or invisible. In Castlevania 3 some textures are used for 2 different things
// or
// at least in 2 different ways(size 1024x1024 vs 1024x256).
// in this case, it would be constantly overwritten, and effectively there wouldn't be any caching
// for those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has
// multiple sets of fonts on each other stored in a single texture and uses the palette to make
// different characters visible or invisible. In Castlevania 3 some textures are used for 2
// different things or at least in 2 different ways (size 1024x1024 vs 1024x256).
//
// To determine whether to use multiple cache entries or a single entry, use the following
// heuristic:
// If the same texture address is used several times during the same frame, assume the address is
// used
// for different purposes and allow creating an additional cache entry. If there's at least one
// entry
// that hasn't been used for the same frame, then overwrite it, in order to keep the cache as
// small as
// possible. If the current texture is found in the cache, use that entry.
// heuristic: If the same texture address is used several times during the same frame, assume the
// address is used for different purposes and allow creating an additional cache entry. If there's
// at least one entry that hasn't been used for the same frame, then overwrite it, in order to
// keep the cache as small as possible. If the current texture is found in the cache, use that
// entry.
//
// For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else
// it was
// done in vain.
// it was done in vain.
auto iter_range = textures_by_address.equal_range(texture_info.GetRawAddress());
TexAddrCache::iterator iter = iter_range.first;
TexAddrCache::iterator oldest_entry = iter;

View File

@ -404,263 +404,95 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
"int4 getKonstColor(State s, StageState ss);\n"
"\n");
// The switch statements in these functions appear to get transformed into an if..else chain
// on NVIDIA's OpenGL/Vulkan drivers, resulting in lower performance than the D3D counterparts.
// Transforming the switch into a binary tree of ifs can increase performance by up to 20%.
if (api_type == APIType::D3D)
{
out.Write("// Helper function for Alpha Test\n"
"bool alphaCompare(int a, int b, uint compare) {{\n"
" switch (compare) {{\n"
" case 0u: // NEVER\n"
" return false;\n"
" case 1u: // LESS\n"
" return a < b;\n"
" case 2u: // EQUAL\n"
" return a == b;\n"
" case 3u: // LEQUAL\n"
" return a <= b;\n"
" case 4u: // GREATER\n"
" return a > b;\n"
" case 5u: // NEQUAL;\n"
" return a != b;\n"
" case 6u: // GEQUAL\n"
" return a >= b;\n"
" case 7u: // ALWAYS\n"
" return true;\n"
" }}\n"
"}}\n"
"\n"
"int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
"uint index) {{\n"
" switch (index) {{\n"
" case 0u: // prev.rgb\n"
" return s.Reg[0].rgb;\n"
" case 1u: // prev.aaa\n"
" return s.Reg[0].aaa;\n"
" case 2u: // c0.rgb\n"
" return s.Reg[1].rgb;\n"
" case 3u: // c0.aaa\n"
" return s.Reg[1].aaa;\n"
" case 4u: // c1.rgb\n"
" return s.Reg[2].rgb;\n"
" case 5u: // c1.aaa\n"
" return s.Reg[2].aaa;\n"
" case 6u: // c2.rgb\n"
" return s.Reg[3].rgb;\n"
" case 7u: // c2.aaa\n"
" return s.Reg[3].aaa;\n"
" case 8u:\n"
" return s.TexColor.rgb;\n"
" case 9u:\n"
" return s.TexColor.aaa;\n"
" case 10u:\n"
" return getRasColor(s, ss, colors_0, colors_1).rgb;\n"
" case 11u:\n"
" return getRasColor(s, ss, colors_0, colors_1).aaa;\n"
" case 12u: // One\n"
" return int3(255, 255, 255);\n"
" case 13u: // Half\n"
" return int3(128, 128, 128);\n"
" case 14u:\n"
" return getKonstColor(s, ss).rgb;\n"
" case 15u: // Zero\n"
" return int3(0, 0, 0);\n"
" }}\n"
"}}\n"
"\n"
"int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
"uint index) {{\n"
" switch (index) {{\n"
" case 0u: // prev.a\n"
" return s.Reg[0].a;\n"
" case 1u: // c0.a\n"
" return s.Reg[1].a;\n"
" case 2u: // c1.a\n"
" return s.Reg[2].a;\n"
" case 3u: // c2.a\n"
" return s.Reg[3].a;\n"
" case 4u:\n"
" return s.TexColor.a;\n"
" case 5u:\n"
" return getRasColor(s, ss, colors_0, colors_1).a;\n"
" case 6u:\n"
" return getKonstColor(s, ss).a;\n"
" case 7u: // Zero\n"
" return 0;\n"
" }}\n"
"}}\n"
"\n"
"int4 getTevReg(in State s, uint index) {{\n"
" switch (index) {{\n"
" case 0u: // prev\n"
" return s.Reg[0];\n"
" case 1u: // c0\n"
" return s.Reg[1];\n"
" case 2u: // c1\n"
" return s.Reg[2];\n"
" case 3u: // c2\n"
" return s.Reg[3];\n"
" default: // prev\n"
" return s.Reg[0];\n"
" }}\n"
"}}\n"
"\n"
"void setRegColor(inout State s, uint index, int3 color) {{\n"
" switch (index) {{\n"
" case 0u: // prev\n"
" s.Reg[0].rgb = color;\n"
" break;\n"
" case 1u: // c0\n"
" s.Reg[1].rgb = color;\n"
" break;\n"
" case 2u: // c1\n"
" s.Reg[2].rgb = color;\n"
" break;\n"
" case 3u: // c2\n"
" s.Reg[3].rgb = color;\n"
" break;\n"
" }}\n"
"}}\n"
"\n"
"void setRegAlpha(inout State s, uint index, int alpha) {{\n"
" switch (index) {{\n"
" case 0u: // prev\n"
" s.Reg[0].a = alpha;\n"
" break;\n"
" case 1u: // c0\n"
" s.Reg[1].a = alpha;\n"
" break;\n"
" case 2u: // c1\n"
" s.Reg[2].a = alpha;\n"
" break;\n"
" case 3u: // c2\n"
" s.Reg[3].a = alpha;\n"
" break;\n"
" }}\n"
"}}\n"
"\n");
}
else
{
out.Write(
"// Helper function for Alpha Test\n"
"bool alphaCompare(int a, int b, uint compare) {{\n"
" if (compare < 4u) {{\n"
" if (compare < 2u) {{\n"
" return (compare == 0u) ? (false) : (a < b);\n"
" }} else {{\n"
" return (compare == 2u) ? (a == b) : (a <= b);\n"
" }}\n"
" }} else {{\n"
" if (compare < 6u) {{\n"
" return (compare == 4u) ? (a > b) : (a != b);\n"
" }} else {{\n"
" return (compare == 6u) ? (a >= b) : (true);\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
"uint index) {{\n"
" if (index < 8u) {{\n"
" if (index < 4u) {{\n"
" if (index < 2u) {{\n"
" return (index == 0u) ? s.Reg[0].rgb : s.Reg[0].aaa;\n"
" }} else {{\n"
" return (index == 2u) ? s.Reg[1].rgb : s.Reg[1].aaa;\n"
" }}\n"
" }} else {{\n"
" if (index < 6u) {{\n"
" return (index == 4u) ? s.Reg[2].rgb : s.Reg[2].aaa;\n"
" }} else {{\n"
" return (index == 6u) ? s.Reg[3].rgb : s.Reg[3].aaa;\n"
" }}\n"
" }}\n"
" }} else {{\n"
" if (index < 12u) {{\n"
" if (index < 10u) {{\n"
" return (index == 8u) ? s.TexColor.rgb : s.TexColor.aaa;\n"
" }} else {{\n"
" int4 ras = getRasColor(s, ss, colors_0, colors_1);\n"
" return (index == 10u) ? ras.rgb : ras.aaa;\n"
" }}\n"
" }} else {{\n"
" if (index < 14u) {{\n"
" return (index == 12u) ? int3(255, 255, 255) : int3(128, 128, 128);\n"
" }} else {{\n"
" return (index == 14u) ? getKonstColor(s, ss).rgb : int3(0, 0, 0);\n"
" }}\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
"uint index) {{\n"
" if (index < 4u) {{\n"
" if (index < 2u) {{\n"
" return (index == 0u) ? s.Reg[0].a : s.Reg[1].a;\n"
" }} else {{\n"
" return (index == 2u) ? s.Reg[2].a : s.Reg[3].a;\n"
" }}\n"
" }} else {{\n"
" if (index < 6u) {{\n"
" return (index == 4u) ? s.TexColor.a : getRasColor(s, ss, colors_0, colors_1).a;\n"
" }} else {{\n"
" return (index == 6u) ? getKonstColor(s, ss).a : 0;\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"int4 getTevReg(in State s, uint index) {{\n"
" if (index < 2u) {{\n"
" if (index == 0u) {{\n"
" return s.Reg[0];\n"
" }} else {{\n"
" return s.Reg[1];\n"
" }}\n"
" }} else {{\n"
" if (index == 2u) {{\n"
" return s.Reg[2];\n"
" }} else {{\n"
" return s.Reg[3];\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"void setRegColor(inout State s, uint index, int3 color) {{\n"
" if (index < 2u) {{\n"
" if (index == 0u) {{\n"
" s.Reg[0].rgb = color;\n"
" }} else {{\n"
" s.Reg[1].rgb = color;\n"
" }}\n"
" }} else {{\n"
" if (index == 2u) {{\n"
" s.Reg[2].rgb = color;\n"
" }} else {{\n"
" s.Reg[3].rgb = color;\n"
" }}\n"
" }}\n"
"}}\n"
"\n"
"void setRegAlpha(inout State s, uint index, int alpha) {{\n"
" if (index < 2u) {{\n"
" if (index == 0u) {{\n"
" s.Reg[0].a = alpha;\n"
" }} else {{\n"
" s.Reg[1].a = alpha;\n"
" }}\n"
" }} else {{\n"
" if (index == 2u) {{\n"
" s.Reg[2].a = alpha;\n"
" }} else {{\n"
" s.Reg[3].a = alpha;\n"
" }}\n"
" }}\n"
"}}\n"
"\n");
}
static constexpr Common::EnumMap<std::string_view, CompareMode::Always> tev_alpha_funcs_table{
"return false;", // CompareMode::Never
"return a < b;", // CompareMode::Less
"return a == b;", // CompareMode::Equal
"return a <= b;", // CompareMode::LEqual
"return a > b;", // CompareMode::Greater
"return a != b;", // CompareMode::NEqual
"return a >= b;", // CompareMode::GEqual
"return true;" // CompareMode::Always
};
static constexpr Common::EnumMap<std::string_view, TevColorArg::Zero> tev_c_input_table{
"return s.Reg[0].rgb;", // CPREV,
"return s.Reg[0].aaa;", // APREV,
"return s.Reg[1].rgb;", // C0,
"return s.Reg[1].aaa;", // A0,
"return s.Reg[2].rgb;", // C1,
"return s.Reg[2].aaa;", // A1,
"return s.Reg[3].rgb;", // C2,
"return s.Reg[3].aaa;", // A2,
"return s.TexColor.rgb;", // TEXC,
"return s.TexColor.aaa;", // TEXA,
"return getRasColor(s, ss, colors_0, colors_1).rgb;", // RASC,
"return getRasColor(s, ss, colors_0, colors_1).aaa;", // RASA,
"return int3(255, 255, 255);", // ONE
"return int3(128, 128, 128);", // HALF
"return getKonstColor(s, ss).rgb;", // KONST
"return int3(0, 0, 0);", // ZERO
};
static constexpr Common::EnumMap<std::string_view, TevAlphaArg::Zero> tev_a_input_table{
"return s.Reg[0].a;", // APREV,
"return s.Reg[1].a;", // A0,
"return s.Reg[2].a;", // A1,
"return s.Reg[3].a;", // A2,
"return s.TexColor.a;", // TEXA,
"return getRasColor(s, ss, colors_0, colors_1).a;", // RASA,
"return getKonstColor(s, ss).a;", // KONST, (hw1 had quarter)
"return 0;", // ZERO
};
static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_regs_lookup_table{
"return s.Reg[0];",
"return s.Reg[1];",
"return s.Reg[2];",
"return s.Reg[3];",
};
static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_c_set_table{
"s.Reg[0].rgb = color;",
"s.Reg[1].rgb = color;",
"s.Reg[2].rgb = color;",
"s.Reg[3].rgb = color;",
};
static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_a_set_table{
"s.Reg[0].a = alpha;",
"s.Reg[1].a = alpha;",
"s.Reg[2].a = alpha;",
"s.Reg[3].a = alpha;",
};
out.Write("// Helper function for Alpha Test\n"
"bool alphaCompare(int a, int b, uint compare) {{\n");
WriteSwitch(out, api_type, "compare", tev_alpha_funcs_table, 2, false);
out.Write("}}\n"
"\n"
"int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
"uint index) {{\n");
WriteSwitch(out, api_type, "index", tev_c_input_table, 2, false);
out.Write("}}\n"
"\n"
"int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
"uint index) {{\n");
WriteSwitch(out, api_type, "index", tev_a_input_table, 2, false);
out.Write("}}\n"
"\n"
"int4 getTevReg(in State s, uint index) {{\n");
WriteSwitch(out, api_type, "index", tev_regs_lookup_table, 2, false);
out.Write("}}\n"
"\n"
"void setRegColor(inout State s, uint index, int3 color) {{\n");
WriteSwitch(out, api_type, "index", tev_c_set_table, 2, true);
out.Write("}}\n"
"\n"
"void setRegAlpha(inout State s, uint index, int alpha) {{\n");
WriteSwitch(out, api_type, "index", tev_a_set_table, 2, true);
out.Write("}}\n"
"\n");
// Since the fixed-point texture coodinate variables aren't global, we need to pass
// them to the select function. This applies to all backends.
@ -1284,78 +1116,59 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
if (use_shader_blend)
{
static constexpr std::array<std::string_view, 8> blendSrcFactor{{
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"initial_ocol0.rgb;", // DSTCLR
"float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
"ocol1.aaa;", // SRCALPHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
}};
static constexpr std::array<std::string_view, 8> blendSrcFactorAlpha{{
"0.0;", // ZERO
"1.0;", // ONE
"initial_ocol0.a;", // DSTCLR
"1.0 - initial_ocol0.a;", // INVDSTCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
}};
static constexpr std::array<std::string_view, 8> blendDstFactor{{
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"ocol0.rgb;", // SRCCLR
"float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
"ocol1.aaa;", // SRCALHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
}};
static constexpr std::array<std::string_view, 8> blendDstFactorAlpha{{
"0.0;", // ZERO
"1.0;", // ONE
"ocol0.a;", // SRCCLR
"1.0 - ocol0.a;", // INVSRCCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
}};
using Common::EnumMap;
static constexpr EnumMap<std::string_view, SrcBlendFactor::InvDstAlpha> blendSrcFactor{
"blend_src.rgb = float3(0,0,0);", // ZERO
"blend_src.rgb = float3(1,1,1);", // ONE
"blend_src.rgb = initial_ocol0.rgb;", // DSTCLR
"blend_src.rgb = float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
"blend_src.rgb = ocol1.aaa;", // SRCALPHA
"blend_src.rgb = float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"blend_src.rgb = initial_ocol0.aaa;", // DSTALPHA
"blend_src.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static constexpr EnumMap<std::string_view, SrcBlendFactor::InvDstAlpha> blendSrcFactorAlpha{
"blend_src.a = 0.0;", // ZERO
"blend_src.a = 1.0;", // ONE
"blend_src.a = initial_ocol0.a;", // DSTCLR
"blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTCLR
"blend_src.a = ocol1.a;", // SRCALPHA
"blend_src.a = 1.0 - ocol1.a;", // INVSRCALPHA
"blend_src.a = initial_ocol0.a;", // DSTALPHA
"blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA
};
static constexpr EnumMap<std::string_view, DstBlendFactor::InvDstAlpha> blendDstFactor{
"blend_dst.rgb = float3(0,0,0);", // ZERO
"blend_dst.rgb = float3(1,1,1);", // ONE
"blend_dst.rgb = ocol0.rgb;", // SRCCLR
"blend_dst.rgb = float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
"blend_dst.rgb = ocol1.aaa;", // SRCALHA
"blend_dst.rgb = float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"blend_dst.rgb = initial_ocol0.aaa;", // DSTALPHA
"blend_dst.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static constexpr EnumMap<std::string_view, DstBlendFactor::InvDstAlpha> blendDstFactorAlpha{
"blend_dst.a = 0.0;", // ZERO
"blend_dst.a = 1.0;", // ONE
"blend_dst.a = ocol0.a;", // SRCCLR
"blend_dst.a = 1.0 - ocol0.a;", // INVSRCCLR
"blend_dst.a = ocol1.a;", // SRCALPHA
"blend_dst.a = 1.0 - ocol1.a;", // INVSRCALPHA
"blend_dst.a = initial_ocol0.a;", // DSTALPHA
"blend_dst.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA
};
out.Write(" if (blend_enable) {{\n"
" float4 blend_src;\n"
" switch (blend_src_factor) {{\n");
for (size_t i = 0; i < blendSrcFactor.size(); i++)
{
out.Write(" case {}u: blend_src.rgb = {}; break;\n", i, blendSrcFactor[i]);
}
" float4 blend_src;\n");
WriteSwitch(out, api_type, "blend_src_factor", blendSrcFactor, 4, true);
WriteSwitch(out, api_type, "blend_src_factor_alpha", blendSrcFactorAlpha, 4, true);
out.Write(" }}\n"
" switch (blend_src_factor_alpha) {{\n");
for (size_t i = 0; i < blendSrcFactorAlpha.size(); i++)
{
out.Write(" case {}u: blend_src.a = {}; break;\n", i, blendSrcFactorAlpha[i]);
}
out.Write(" }}\n"
" float4 blend_dst;\n"
" switch (blend_dst_factor) {{\n");
for (size_t i = 0; i < blendDstFactor.size(); i++)
{
out.Write(" case {}u: blend_dst.rgb = {}; break;\n", i, blendDstFactor[i]);
}
out.Write(" }}\n"
" switch (blend_dst_factor_alpha) {{\n");
for (size_t i = 0; i < blendDstFactorAlpha.size(); i++)
{
out.Write(" case {}u: blend_dst.a = {}; break;\n", i, blendDstFactorAlpha[i]);
}
out.Write(" float4 blend_dst;\n");
WriteSwitch(out, api_type, "blend_dst_factor", blendDstFactor, 4, true);
WriteSwitch(out, api_type, "blend_dst_factor_alpha", blendDstFactorAlpha, 4, true);
out.Write(
" }}\n"
" float4 blend_result;\n"
" if (blend_subtract)\n"
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n"

View File

@ -91,7 +91,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = nat_offset;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true;
nat_offset += 4;
}
@ -110,7 +110,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.position.components = pos_elements;
m_native_vtx_decl.position.enable = true;
m_native_vtx_decl.position.offset = nat_offset;
m_native_vtx_decl.position.type = VAR_FLOAT;
m_native_vtx_decl.position.type = ComponentFormat::Float;
m_native_vtx_decl.position.integer = false;
nat_offset += pos_elements * sizeof(float);
@ -134,7 +134,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.normals[i].components = 3;
m_native_vtx_decl.normals[i].enable = true;
m_native_vtx_decl.normals[i].offset = nat_offset;
m_native_vtx_decl.normals[i].type = VAR_FLOAT;
m_native_vtx_decl.normals[i].type = ComponentFormat::Float;
m_native_vtx_decl.normals[i].integer = false;
nat_offset += 12;
}
@ -143,7 +143,7 @@ void VertexLoader::CompileVertexTranslator()
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false;
TPipelineFunction pFunc =
@ -166,7 +166,7 @@ void VertexLoader::CompileVertexTranslator()
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
m_native_vtx_decl.texcoords[i].offset = nat_offset;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false;
const auto tc = m_VtxDesc.high.TexCoord[i].Value();

View File

@ -6,6 +6,7 @@
#include <array>
#include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoaderManager.h"
@ -59,7 +60,7 @@ VertexLoaderARM64::VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_at
WriteProtect();
}
void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute, ARM64Reg reg)
void VertexLoaderARM64::GetVertexAddr(CPArray array, VertexComponentFormat attribute, ARM64Reg reg)
{
if (IsIndexed(attribute))
{
@ -95,7 +96,7 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute
REV16(scratch1_reg, scratch1_reg);
}
if (array == ARRAY_POSITION)
if (array == CPArray::Position)
{
EOR(scratch2_reg, scratch1_reg,
attribute == VertexComponentFormat::Index8 ? LogicalImm(0xFF, 32) :
@ -103,17 +104,18 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute
m_skip_vertex = CBZ(scratch2_reg);
}
LDR(IndexType::Unsigned, scratch2_reg, stride_reg, array * 4);
LDR(IndexType::Unsigned, scratch2_reg, stride_reg, static_cast<u8>(array) * 4);
MUL(scratch1_reg, scratch1_reg, scratch2_reg);
LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg, array * 8);
LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg,
static_cast<u8>(array) * 8);
ADD(EncodeRegTo64(reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg));
}
else
ADD(reg, src_reg, m_src_ofs);
}
s32 VertexLoaderARM64::GetAddressImm(int array, VertexComponentFormat attribute,
s32 VertexLoaderARM64::GetAddressImm(CPArray array, VertexComponentFormat attribute,
Arm64Gen::ARM64Reg reg, u32 align)
{
if (IsIndexed(attribute) || (m_src_ofs > 255 && (m_src_ofs & (align - 1))))
@ -219,7 +221,7 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm
native_format->components = count_out;
native_format->enable = true;
native_format->offset = m_dst_ofs;
native_format->type = VAR_FLOAT;
native_format->type = ComponentFormat::Float;
native_format->integer = false;
m_dst_ofs += sizeof(float) * count_out;
@ -403,8 +405,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
MOV(skipped_reg, ARM64Reg::WZR);
MOV(saved_count, count_reg);
MOVP2R(stride_reg, g_main_cp_state.array_strides);
MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases);
MOVP2R(stride_reg, g_main_cp_state.array_strides.data());
MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data());
if (need_scale)
MOVP2R(scale_reg, scale_factors);
@ -427,7 +429,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = m_dst_ofs;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true;
m_src_ofs += sizeof(u8);
m_dst_ofs += sizeof(u32);
@ -448,8 +450,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_size = GetLoadSize(load_bytes);
load_size <<= 3;
s32 offset = GetAddressImm(ARRAY_POSITION, m_VtxDesc.low.Position, EncodeRegTo64(scratch1_reg),
load_size);
s32 offset = GetAddressImm(CPArray::Position, m_VtxDesc.low.Position,
EncodeRegTo64(scratch1_reg), load_size);
ReadVertex(m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements,
m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position, offset);
}
@ -470,7 +472,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_bytes = elem_size * 3;
int load_size = GetLoadSize(load_bytes);
offset = GetAddressImm(ARRAY_NORMAL, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg),
offset = GetAddressImm(CPArray::Normal, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg),
load_size << 3);
if (offset == -1)
@ -488,10 +490,10 @@ void VertexLoaderARM64::GenerateVertexLoader()
}
}
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false;
if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
@ -501,22 +503,22 @@ void VertexLoaderARM64::GenerateVertexLoader()
m_VtxAttr.GetColorFormat(i) == ColorFormat::RGBA4444)
align = 2;
s32 offset = GetAddressImm(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i],
s32 offset = GetAddressImm(CPArray::Color0 + i, m_VtxDesc.low.Color[i],
EncodeRegTo64(scratch1_reg), align);
ReadColor(m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i), offset);
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true;
m_native_vtx_decl.colors[i].offset = m_dst_ofs;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false;
m_dst_ofs += 4;
}
}
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
m_native_vtx_decl.texcoords[i].offset = m_dst_ofs;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false;
int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::S ? 1 : 2;
@ -527,7 +529,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_size = GetLoadSize(load_bytes);
load_size <<= 3;
s32 offset = GetAddressImm(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i],
s32 offset = GetAddressImm(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i],
EncodeRegTo64(scratch1_reg), load_size);
u8 scaling_exponent = m_VtxAttr.GetTexFrac(i);
ReadVertex(m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements,
@ -538,7 +540,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
{
m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false;
LDRB(IndexType::Unsigned, scratch2_reg, src_reg, texmatidx_ofs[i]);

View File

@ -11,6 +11,7 @@ class DataReader;
enum class VertexComponentFormat;
enum class ComponentFormat;
enum class ColorFormat;
enum class CPArray : u8;
class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlock
{
@ -25,8 +26,9 @@ private:
u32 m_dst_ofs = 0;
Arm64Gen::FixupBranch m_skip_vertex;
Arm64Gen::ARM64FloatEmitter m_float_emit;
void GetVertexAddr(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, u32 align);
void GetVertexAddr(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg,
u32 align);
int ReadVertex(VertexComponentFormat attribute, ComponentFormat format, int count_in,
int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format, s32 offset = -1);

View File

@ -12,16 +12,14 @@
#include <utility>
#include <vector>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h"
#include "Core/DolphinAnalytics.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/NativeVertexFormat.h"
@ -48,14 +46,21 @@ static std::mutex s_vertex_loader_map_lock;
static VertexLoaderMap s_vertex_loader_map;
// TODO - change into array of pointers. Keep a map of all seen so far.
u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS];
Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;
BitSet8 g_main_vat_dirty;
BitSet8 g_preprocess_vat_dirty;
bool g_bases_dirty; // Main only
u8 g_current_vat; // Main only
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
void Init()
{
MarkAllDirty();
for (auto& map_entry : g_main_cp_state.vertex_loaders)
for (auto& map_entry : g_main_vertex_loaders)
map_entry = nullptr;
for (auto& map_entry : g_preprocess_cp_state.vertex_loaders)
for (auto& map_entry : g_preprocess_vertex_loaders)
map_entry = nullptr;
SETSTAT(g_stats.num_vertex_loaders, 0);
}
@ -70,7 +75,7 @@ void Clear()
void UpdateVertexArrayPointers()
{
// Anything to update?
if (!g_main_cp_state.bases_dirty)
if (!g_bases_dirty)
return;
// Some games such as Burnout 2 can put invalid addresses into
@ -80,27 +85,28 @@ void UpdateVertexArrayPointers()
// 12 through 15 are used for loading data into xfmem.
// We also only update the array base if the vertex description states we are going to use it.
if (IsIndexed(g_main_cp_state.vtx_desc.low.Position))
cached_arraybases[ARRAY_POSITION] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_POSITION]);
cached_arraybases[CPArray::Position] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Position]);
if (IsIndexed(g_main_cp_state.vtx_desc.low.Normal))
cached_arraybases[ARRAY_NORMAL] = Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_NORMAL]);
cached_arraybases[CPArray::Normal] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Normal]);
for (size_t i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++)
for (u8 i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++)
{
if (IsIndexed(g_main_cp_state.vtx_desc.low.Color[i]))
cached_arraybases[ARRAY_COLOR0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_COLOR0 + i]);
cached_arraybases[CPArray::Color0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Color0 + i]);
}
for (size_t i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++)
for (u8 i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++)
{
if (IsIndexed(g_main_cp_state.vtx_desc.high.TexCoord[i]))
cached_arraybases[ARRAY_TEXCOORD0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_TEXCOORD0 + i]);
cached_arraybases[CPArray::TexCoord0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::TexCoord0 + i]);
}
g_main_cp_state.bases_dirty = false;
g_bases_dirty = false;
}
namespace
@ -115,8 +121,8 @@ struct entry
void MarkAllDirty()
{
g_main_cp_state.attr_dirty = BitSet32::AllTrue(8);
g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8);
g_main_vat_dirty = BitSet8::AllTrue(8);
g_preprocess_vat_dirty = BitSet8::AllTrue(8);
}
NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl)
@ -140,7 +146,8 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
std::memset(&new_decl, 0, sizeof(new_decl));
new_decl.stride = decl.stride;
auto MakeDummyAttribute = [](AttributeFormat& attr, VarType type, int components, bool integer) {
auto MakeDummyAttribute = [](AttributeFormat& attr, ComponentFormat type, int components,
bool integer) {
attr.type = type;
attr.components = components;
attr.offset = 0;
@ -158,32 +165,32 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
if (decl.position.enable)
CopyAttribute(new_decl.position, decl.position);
else
MakeDummyAttribute(new_decl.position, VAR_FLOAT, 1, false);
MakeDummyAttribute(new_decl.position, ComponentFormat::Float, 1, false);
for (size_t i = 0; i < std::size(new_decl.normals); i++)
{
if (decl.normals[i].enable)
CopyAttribute(new_decl.normals[i], decl.normals[i]);
else
MakeDummyAttribute(new_decl.normals[i], VAR_FLOAT, 1, false);
MakeDummyAttribute(new_decl.normals[i], ComponentFormat::Float, 1, false);
}
for (size_t i = 0; i < std::size(new_decl.colors); i++)
{
if (decl.colors[i].enable)
CopyAttribute(new_decl.colors[i], decl.colors[i]);
else
MakeDummyAttribute(new_decl.colors[i], VAR_UNSIGNED_BYTE, 4, false);
MakeDummyAttribute(new_decl.colors[i], ComponentFormat::UByte, 4, false);
}
for (size_t i = 0; i < std::size(new_decl.texcoords); i++)
{
if (decl.texcoords[i].enable)
CopyAttribute(new_decl.texcoords[i], decl.texcoords[i]);
else
MakeDummyAttribute(new_decl.texcoords[i], VAR_FLOAT, 1, false);
MakeDummyAttribute(new_decl.texcoords[i], ComponentFormat::Float, 1, false);
}
if (decl.posmtx.enable)
CopyAttribute(new_decl.posmtx, decl.posmtx);
else
MakeDummyAttribute(new_decl.posmtx, VAR_UNSIGNED_BYTE, 1, true);
MakeDummyAttribute(new_decl.posmtx, ComponentFormat::UByte, 1, true);
return GetOrCreateMatchingFormat(new_decl);
}
@ -191,10 +198,12 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false)
{
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
state->last_id = vtx_attr_group;
BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders;
g_current_vat = vtx_attr_group;
VertexLoaderBase* loader;
if (state->attr_dirty[vtx_attr_group])
if (attr_dirty[vtx_attr_group])
{
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong
// thread
@ -224,12 +233,12 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
native = g_renderer->CreateNativeVertexFormat(format);
loader->m_native_vertex_format = native.get();
}
state->vertex_loaders[vtx_attr_group] = loader;
state->attr_dirty[vtx_attr_group] = false;
vertex_loaders[vtx_attr_group] = loader;
attr_dirty[vtx_attr_group] = false;
}
else
{
loader = state->vertex_loaders[vtx_attr_group];
loader = vertex_loaders[vtx_attr_group];
}
// Lookup pointers for any vertex arrays.
@ -239,7 +248,8 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
return loader;
}
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess)
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess)
{
if (!count)
return 0;
@ -266,7 +276,8 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope.
bool cullall = (bpmem.genMode.cullmode == CullMode::All && primitive < 5);
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
DataReader dst = g_vertex_manager->PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall);
@ -287,147 +298,3 @@ NativeVertexFormat* GetCurrentVertexFormat()
}
} // namespace VertexLoaderManager
void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
{
bool update_global_state = !is_preprocess;
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
switch (sub_cmd & CP_COMMAND_MASK)
{
case UNKNOWN_00:
case UNKNOWN_10:
case UNKNOWN_20:
if (!(sub_cmd == UNKNOWN_20 && value == 0))
{
// All titles using libogc or the official SDK issue 0x20 with value=0 on startup
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}",
sub_cmd);
}
break;
case MATINDEX_A:
if (sub_cmd != MATINDEX_A)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_A: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_A, sub_cmd);
}
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedA(value);
break;
case MATINDEX_B:
if (sub_cmd != MATINDEX_B)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_B: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_B, sub_cmd);
}
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedB(value);
break;
case VCD_LO:
if (sub_cmd != VCD_LO) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_LO: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_LO, sub_cmd);
}
state->vtx_desc.low.Hex = value;
state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG);
state->bases_dirty = true;
break;
case VCD_HI:
if (sub_cmd != VCD_HI) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_HI: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_HI, sub_cmd);
}
state->vtx_desc.high.Hex = value;
state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG);
state->bases_dirty = true;
break;
case CP_VAT_REG_A:
if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;
case CP_VAT_REG_B:
if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;
case CP_VAT_REG_C:
if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;
// Pointers to vertex arrays in GC RAM
case ARRAY_BASE:
state->array_bases[sub_cmd & CP_ARRAY_MASK] =
value & CommandProcessor::GetPhysicalAddressMask();
state->bases_dirty = true;
break;
case ARRAY_STRIDE:
state->array_strides[sub_cmd & CP_ARRAY_MASK] = value & 0xFF;
break;
default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value);
}
}
void FillCPMemoryArray(u32* memory)
{
memory[MATINDEX_A] = g_main_cp_state.matrix_index_a.Hex;
memory[MATINDEX_B] = g_main_cp_state.matrix_index_b.Hex;
memory[VCD_LO] = g_main_cp_state.vtx_desc.low.Hex;
memory[VCD_HI] = g_main_cp_state.vtx_desc.high.Hex;
for (int i = 0; i < CP_NUM_VAT_REG; ++i)
{
memory[CP_VAT_REG_A + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[CP_VAT_REG_B + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[CP_VAT_REG_C + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
}
for (int i = 0; i < CP_NUM_ARRAYS; ++i)
{
memory[ARRAY_BASE + i] = g_main_cp_state.array_bases[i];
memory[ARRAY_STRIDE + i] = g_main_cp_state.array_strides[i];
}
}

View File

@ -3,17 +3,24 @@
#pragma once
#include <array>
#include <memory>
#include <string>
#include <unordered_map>
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/CPMemory.h"
class DataReader;
class NativeVertexFormat;
struct PortableVertexDeclaration;
namespace OpcodeDecoder
{
enum class Primitive : u8;
};
namespace VertexLoaderManager
{
using NativeVertexFormatMap =
@ -35,12 +42,13 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl);
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess);
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess);
NativeVertexFormat* GetCurrentVertexFormat();
// Resolved pointers to array bases. Used by vertex loaders.
extern u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS];
extern Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;
void UpdateVertexArrayPointers();
// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
@ -50,4 +58,11 @@ extern u32 position_matrix_index[4];
// VB_HAS_X. Bitmask telling what vertex components are present.
extern u32 g_current_components;
extern BitSet8 g_main_vat_dirty;
extern BitSet8 g_preprocess_vat_dirty;
extern bool g_bases_dirty; // Main only
extern u8 g_current_vat; // Main only
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
} // namespace VertexLoaderManager

View File

@ -15,6 +15,7 @@
#include "Common/JitRegister.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoaderManager.h"
@ -54,7 +55,7 @@ VertexLoaderX64::VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att)
JitRegister::Register(region, GetCodePtr(), name.c_str());
}
OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute)
OpArg VertexLoaderX64::GetVertexAddr(CPArray array, VertexComponentFormat attribute)
{
OpArg data = MDisp(src_reg, m_src_ofs);
if (IsIndexed(attribute))
@ -62,7 +63,7 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute)
int bits = attribute == VertexComponentFormat::Index8 ? 8 : 16;
LoadAndSwap(bits, scratch1, data);
m_src_ofs += bits / 8;
if (array == ARRAY_POSITION)
if (array == CPArray::Position)
{
CMP(bits, R(scratch1), Imm8(-1));
m_skip_vertex = J_CC(CC_E, true);
@ -121,7 +122,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
native_format->components = count_out;
native_format->enable = true;
native_format->offset = m_dst_ofs;
native_format->type = VAR_FLOAT;
native_format->type = ComponentFormat::Float;
native_format->integer = false;
m_dst_ofs += sizeof(float) * count_out;
@ -420,7 +421,7 @@ void VertexLoaderX64::GenerateVertexLoader()
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = m_dst_ofs;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true;
m_src_ofs += sizeof(u8);
m_dst_ofs += sizeof(u32);
@ -433,7 +434,7 @@ void VertexLoaderX64::GenerateVertexLoader()
texmatidx_ofs[i] = m_src_ofs++;
}
OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.low.Position);
OpArg data = GetVertexAddr(CPArray::Position, m_VtxDesc.low.Position);
int pos_elements = m_VtxAttr.g0.PosElements == CoordComponentCount::XY ? 2 : 3;
ReadVertex(data, m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements,
m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position);
@ -448,7 +449,7 @@ void VertexLoaderX64::GenerateVertexLoader()
{
if (!i || m_VtxAttr.g0.NormalIndex3)
{
data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.low.Normal);
data = GetVertexAddr(CPArray::Normal, m_VtxDesc.low.Normal);
int elem_size = GetElementSize(m_VtxAttr.g0.NormalFormat);
data.AddMemOffset(i * elem_size * 3);
}
@ -457,27 +458,27 @@ void VertexLoaderX64::GenerateVertexLoader()
}
}
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
{
data = GetVertexAddr(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i]);
data = GetVertexAddr(CPArray::Color0 + i, m_VtxDesc.low.Color[i]);
ReadColor(data, m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i));
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true;
m_native_vtx_decl.colors[i].offset = m_dst_ofs;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false;
m_dst_ofs += 4;
}
}
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::ST ? 2 : 1;
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
{
data = GetVertexAddr(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i]);
data = GetVertexAddr(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i]);
u8 scaling_exponent = m_VtxAttr.GetTexFrac(i);
ReadVertex(data, m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements,
m_VtxDesc.low.TexMatIdx[i] ? 2 : elements, m_VtxAttr.g0.ByteDequant,
@ -487,7 +488,7 @@ void VertexLoaderX64::GenerateVertexLoader()
{
m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false;
MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i]));
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)

View File

@ -10,6 +10,7 @@
enum class VertexComponentFormat;
enum class ComponentFormat;
enum class ColorFormat;
enum class CPArray : u8;
class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock
{
@ -23,7 +24,7 @@ private:
u32 m_src_ofs = 0;
u32 m_dst_ofs = 0;
Gen::FixupBranch m_skip_vertex;
Gen::OpArg GetVertexAddr(int array, VertexComponentFormat attribute);
Gen::OpArg GetVertexAddr(CPArray array, VertexComponentFormat attribute);
int ReadVertex(Gen::OpArg data, VertexComponentFormat attribute, ComponentFormat format,
int count_in, int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format);

View File

@ -6,6 +6,7 @@
#include <cstring>
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/MsgHandler.h"
#include "Common/Swap.h"
@ -78,8 +79,8 @@ void Color_ReadIndex_16b_565(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* const address =
VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
u16 value;
std::memcpy(&value, address, sizeof(u16));
@ -91,8 +92,8 @@ template <typename I>
void Color_ReadIndex_24b_888(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read24(address));
}
@ -100,18 +101,18 @@ template <typename I>
void Color_ReadIndex_32b_888x(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read24(address));
}
template <typename I>
void Color_ReadIndex_16b_4444(VertexLoader* loader)
{
auto const index = DataRead<I>();
const auto index = DataRead<I>();
const u8* const address =
VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
u16 value;
std::memcpy(&value, address, sizeof(u16));
@ -123,9 +124,9 @@ template <typename I>
void Color_ReadIndex_24b_6666(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* data = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]) - 1;
const u32 val = Common::swap32(data);
const u8* data = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
const u32 val = Common::swap24(data);
SetCol6666(loader, val);
}
@ -133,8 +134,8 @@ template <typename I>
void Color_ReadIndex_32b_8888(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read32(address));
}
@ -166,7 +167,7 @@ void Color_ReadDirect_16b_4444(VertexLoader* loader)
void Color_ReadDirect_24b_6666(VertexLoader* loader)
{
SetCol6666(loader, Common::swap32(DataGetPosition() - 1));
SetCol6666(loader, Common::swap24(DataGetPosition()));
DataSkip(3);
}
@ -175,21 +176,40 @@ void Color_ReadDirect_32b_8888(VertexLoader* loader)
SetCol(loader, DataReadU32Unswapped());
}
constexpr TPipelineFunction s_table_read_color[4][6] = {
{nullptr, nullptr, nullptr, nullptr, nullptr, nullptr},
{Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x,
Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888},
{Color_ReadIndex_16b_565<u8>, Color_ReadIndex_24b_888<u8>, Color_ReadIndex_32b_888x<u8>,
Color_ReadIndex_16b_4444<u8>, Color_ReadIndex_24b_6666<u8>, Color_ReadIndex_32b_8888<u8>},
{Color_ReadIndex_16b_565<u16>, Color_ReadIndex_24b_888<u16>, Color_ReadIndex_32b_888x<u16>,
Color_ReadIndex_16b_4444<u16>, Color_ReadIndex_24b_6666<u16>, Color_ReadIndex_32b_8888<u16>},
using Common::EnumMap;
// These functions are to work around a "too many initializer values" error with nested brackets
// C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
// (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
constexpr EnumMap<TPipelineFunction, ColorFormat::RGBA8888>
f(EnumMap<TPipelineFunction, ColorFormat::RGBA8888> in)
{
return in;
}
constexpr EnumMap<u32, ColorFormat::RGBA8888> g(EnumMap<u32, ColorFormat::RGBA8888> in)
{
return in;
}
template <typename T>
using Table = EnumMap<EnumMap<T, ColorFormat::RGBA8888>, VertexComponentFormat::Index16>;
constexpr Table<TPipelineFunction> s_table_read_color = {
f({nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}),
f({Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x,
Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888}),
f({Color_ReadIndex_16b_565<u8>, Color_ReadIndex_24b_888<u8>, Color_ReadIndex_32b_888x<u8>,
Color_ReadIndex_16b_4444<u8>, Color_ReadIndex_24b_6666<u8>, Color_ReadIndex_32b_8888<u8>}),
f({Color_ReadIndex_16b_565<u16>, Color_ReadIndex_24b_888<u16>, Color_ReadIndex_32b_888x<u16>,
Color_ReadIndex_16b_4444<u16>, Color_ReadIndex_24b_6666<u16>,
Color_ReadIndex_32b_8888<u16>}),
};
constexpr u32 s_table_read_color_vertex_size[4][6] = {
{0, 0, 0, 0, 0, 0},
{2, 3, 4, 2, 3, 4},
{1, 1, 1, 1, 1, 1},
{2, 2, 2, 2, 2, 2},
constexpr Table<u32> s_table_read_color_vertex_size = {
g({0u, 0u, 0u, 0u, 0u, 0u}),
g({2u, 3u, 4u, 2u, 3u, 4u}),
g({1u, 1u, 1u, 1u, 1u, 1u}),
g({2u, 2u, 2u, 2u, 2u, 2u}),
};
} // Anonymous namespace
@ -200,7 +220,7 @@ u32 VertexLoader_Color::GetSize(VertexComponentFormat type, ColorFormat format)
PanicAlertFmt("Invalid color format {}", format);
return 0;
}
return s_table_read_color_vertex_size[u32(type)][u32(format)];
return s_table_read_color_vertex_size[type][format];
}
TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, ColorFormat format)
@ -210,5 +230,5 @@ TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, Co
PanicAlertFmt("Invalid color format {}", format);
return nullptr;
}
return s_table_read_color[u32(type)][u32(format)];
return s_table_read_color[type][format];
}

View File

@ -7,6 +7,7 @@
#include <type_traits>
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
@ -58,7 +59,7 @@ struct Normal_Direct
{
static void function([[maybe_unused]] VertexLoader* loader)
{
auto const source = reinterpret_cast<const T*>(DataGetPosition());
const auto source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source);
DataSkip<N * 3 * sizeof(T)>();
}
@ -71,10 +72,10 @@ void Normal_Index_Offset()
{
static_assert(std::is_unsigned_v<I>, "Only unsigned I is sane!");
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[ARRAY_NORMAL] +
(index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
const auto index = DataRead<I>();
const auto data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[CPArray::Normal] +
(index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
}
@ -98,39 +99,6 @@ struct Normal_Index_Indices3
static constexpr u32 size = sizeof(I) * 3;
};
enum NormalType
{
NRM_NOT_PRESENT = 0,
NRM_DIRECT = 1,
NRM_INDEX8 = 2,
NRM_INDEX16 = 3,
NUM_NRM_TYPE
};
enum NormalFormat
{
FORMAT_UBYTE = 0,
FORMAT_BYTE = 1,
FORMAT_USHORT = 2,
FORMAT_SHORT = 3,
FORMAT_FLOAT = 4,
NUM_NRM_FORMAT
};
enum NormalElements
{
NRM_NBT = 0,
NRM_NBT3 = 1,
NUM_NRM_ELEMENTS
};
enum NormalIndices
{
NRM_INDICES1 = 0,
NRM_INDICES3 = 1,
NUM_NRM_INDICES
};
struct Set
{
template <typename T>
@ -145,83 +113,88 @@ struct Set
TPipelineFunction function;
};
using Formats = std::array<Set, NUM_NRM_FORMAT>;
using Elements = std::array<Formats, NUM_NRM_ELEMENTS>;
using Indices = std::array<Elements, NUM_NRM_INDICES>;
using Types = std::array<Indices, NUM_NRM_TYPE>;
using Common::EnumMap;
using Formats = EnumMap<Set, ComponentFormat::Float>;
using Elements = EnumMap<Formats, NormalComponentCount::NBT>;
using Indices = std::array<Elements, 2>;
using Types = EnumMap<Indices, VertexComponentFormat::Index16>;
constexpr Types InitializeTable()
{
Types table{};
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct<u8, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct<s8, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct<u16, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct<s16, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct<float, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
using VCF = VertexComponentFormat;
using NCC = NormalComponentCount;
using FMT = ComponentFormat;
// Same as above
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct<u8, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct<s8, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct<u16, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct<s16, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct<float, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
table[VCF::Direct][false][NCC::N][FMT::UByte] = Normal_Direct<u8, 1>();
table[VCF::Direct][false][NCC::N][FMT::Byte] = Normal_Direct<s8, 1>();
table[VCF::Direct][false][NCC::N][FMT::UShort] = Normal_Direct<u16, 1>();
table[VCF::Direct][false][NCC::N][FMT::Short] = Normal_Direct<s16, 1>();
table[VCF::Direct][false][NCC::N][FMT::Float] = Normal_Direct<float, 1>();
table[VCF::Direct][false][NCC::NBT][FMT::UByte] = Normal_Direct<u8, 3>();
table[VCF::Direct][false][NCC::NBT][FMT::Byte] = Normal_Direct<s8, 3>();
table[VCF::Direct][false][NCC::NBT][FMT::UShort] = Normal_Direct<u16, 3>();
table[VCF::Direct][false][NCC::NBT][FMT::Short] = Normal_Direct<s16, 3>();
table[VCF::Direct][false][NCC::NBT][FMT::Float] = Normal_Direct<float, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u8, u8, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u8, s8, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u8, u16, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u8, s16, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u8, float, 3>();
// Same as above, since there are no indices
table[VCF::Direct][true][NCC::N][FMT::UByte] = Normal_Direct<u8, 1>();
table[VCF::Direct][true][NCC::N][FMT::Byte] = Normal_Direct<s8, 1>();
table[VCF::Direct][true][NCC::N][FMT::UShort] = Normal_Direct<u16, 1>();
table[VCF::Direct][true][NCC::N][FMT::Short] = Normal_Direct<s16, 1>();
table[VCF::Direct][true][NCC::N][FMT::Float] = Normal_Direct<float, 1>();
table[VCF::Direct][true][NCC::NBT][FMT::UByte] = Normal_Direct<u8, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::Byte] = Normal_Direct<s8, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::UShort] = Normal_Direct<u16, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::Short] = Normal_Direct<s16, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::Float] = Normal_Direct<float, 3>();
// Same as above for NRM_NBT
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u8, u8>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u8, s8>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u8, u16>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u8, s16>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u8, float>();
table[VCF::Index8][false][NCC::N][FMT::UByte] = Normal_Index<u8, u8, 1>();
table[VCF::Index8][false][NCC::N][FMT::Byte] = Normal_Index<u8, s8, 1>();
table[VCF::Index8][false][NCC::N][FMT::UShort] = Normal_Index<u8, u16, 1>();
table[VCF::Index8][false][NCC::N][FMT::Short] = Normal_Index<u8, s16, 1>();
table[VCF::Index8][false][NCC::N][FMT::Float] = Normal_Index<u8, float, 1>();
table[VCF::Index8][false][NCC::NBT][FMT::UByte] = Normal_Index<u8, u8, 3>();
table[VCF::Index8][false][NCC::NBT][FMT::Byte] = Normal_Index<u8, s8, 3>();
table[VCF::Index8][false][NCC::NBT][FMT::UShort] = Normal_Index<u8, u16, 3>();
table[VCF::Index8][false][NCC::NBT][FMT::Short] = Normal_Index<u8, s16, 3>();
table[VCF::Index8][false][NCC::NBT][FMT::Float] = Normal_Index<u8, float, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u16, u8, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u16, s8, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u16, u16, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u16, s16, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u16, float, 3>();
// Same for NormalComponentCount::N; differs for NBT
table[VCF::Index8][true][NCC::N][FMT::UByte] = Normal_Index<u8, u8, 1>();
table[VCF::Index8][true][NCC::N][FMT::Byte] = Normal_Index<u8, s8, 1>();
table[VCF::Index8][true][NCC::N][FMT::UShort] = Normal_Index<u8, u16, 1>();
table[VCF::Index8][true][NCC::N][FMT::Short] = Normal_Index<u8, s16, 1>();
table[VCF::Index8][true][NCC::N][FMT::Float] = Normal_Index<u8, float, 1>();
table[VCF::Index8][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3<u8, u8>();
table[VCF::Index8][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3<u8, s8>();
table[VCF::Index8][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3<u8, u16>();
table[VCF::Index8][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3<u8, s16>();
table[VCF::Index8][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3<u8, float>();
// Same as above for NRM_NBT
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u16, u8>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u16, s8>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
table[VCF::Index16][false][NCC::N][FMT::UByte] = Normal_Index<u16, u8, 1>();
table[VCF::Index16][false][NCC::N][FMT::Byte] = Normal_Index<u16, s8, 1>();
table[VCF::Index16][false][NCC::N][FMT::UShort] = Normal_Index<u16, u16, 1>();
table[VCF::Index16][false][NCC::N][FMT::Short] = Normal_Index<u16, s16, 1>();
table[VCF::Index16][false][NCC::N][FMT::Float] = Normal_Index<u16, float, 1>();
table[VCF::Index16][false][NCC::NBT][FMT::UByte] = Normal_Index<u16, u8, 3>();
table[VCF::Index16][false][NCC::NBT][FMT::Byte] = Normal_Index<u16, s8, 3>();
table[VCF::Index16][false][NCC::NBT][FMT::UShort] = Normal_Index<u16, u16, 3>();
table[VCF::Index16][false][NCC::NBT][FMT::Short] = Normal_Index<u16, s16, 3>();
table[VCF::Index16][false][NCC::NBT][FMT::Float] = Normal_Index<u16, float, 3>();
// Same for NormalComponentCount::N; differs for NBT
table[VCF::Index16][true][NCC::N][FMT::UByte] = Normal_Index<u16, u8, 1>();
table[VCF::Index16][true][NCC::N][FMT::Byte] = Normal_Index<u16, s8, 1>();
table[VCF::Index16][true][NCC::N][FMT::UShort] = Normal_Index<u16, u16, 1>();
table[VCF::Index16][true][NCC::N][FMT::Short] = Normal_Index<u16, s16, 1>();
table[VCF::Index16][true][NCC::N][FMT::Float] = Normal_Index<u16, float, 1>();
table[VCF::Index16][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3<u16, u8>();
table[VCF::Index16][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3<u16, s8>();
table[VCF::Index16][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3<u16, u16>();
table[VCF::Index16][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3<u16, s16>();
table[VCF::Index16][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3<u16, float>();
return table;
}
@ -230,14 +203,14 @@ constexpr Types s_table = InitializeTable();
} // Anonymous namespace
u32 VertexLoader_Normal::GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3)
NormalComponentCount elements, bool index3)
{
return s_table[u32(type)][index3][u32(elements)][u32(format)].gc_size;
return s_table[type][index3][elements][format].gc_size;
}
TPipelineFunction VertexLoader_Normal::GetFunction(VertexComponentFormat type,
ComponentFormat format,
NormalComponentCount elements, u32 index3)
NormalComponentCount elements, bool index3)
{
return s_table[u32(type)][index3][u32(elements)][u32(format)].function;
return s_table[type][index3][elements][format].function;
}

View File

@ -14,8 +14,8 @@ class VertexLoader_Normal
{
public:
static u32 GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3);
NormalComponentCount elements, bool index3);
static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3);
NormalComponentCount elements, bool index3);
};

View File

@ -7,6 +7,7 @@
#include <type_traits>
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Swap.h"
#include "VideoCommon/DataReader.h"
@ -59,8 +60,8 @@ void Pos_ReadIndex(VertexLoader* loader)
const auto index = DataRead<I>();
loader->m_vertexSkip = index == std::numeric_limits<I>::max();
const auto data =
reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[ARRAY_POSITION] +
(index * g_main_cp_state.array_strides[ARRAY_POSITION]));
reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[CPArray::Position] +
(index * g_main_cp_state.array_strides[CPArray::Position]));
const auto scale = loader->m_posScale;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
@ -76,138 +77,109 @@ void Pos_ReadIndex(VertexLoader* loader)
LOG_VTX();
}
constexpr TPipelineFunction s_table_read_position[4][8][2] = {
{
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
},
{
{
Pos_ReadDirect<u8, 2>,
Pos_ReadDirect<u8, 3>,
},
{
Pos_ReadDirect<s8, 2>,
Pos_ReadDirect<s8, 3>,
},
{
Pos_ReadDirect<u16, 2>,
Pos_ReadDirect<u16, 3>,
},
{
Pos_ReadDirect<s16, 2>,
Pos_ReadDirect<s16, 3>,
},
{
Pos_ReadDirect<float, 2>,
Pos_ReadDirect<float, 3>,
},
},
{
{
Pos_ReadIndex<u8, u8, 2>,
Pos_ReadIndex<u8, u8, 3>,
},
{
Pos_ReadIndex<u8, s8, 2>,
Pos_ReadIndex<u8, s8, 3>,
},
{
Pos_ReadIndex<u8, u16, 2>,
Pos_ReadIndex<u8, u16, 3>,
},
{
Pos_ReadIndex<u8, s16, 2>,
Pos_ReadIndex<u8, s16, 3>,
},
{
Pos_ReadIndex<u8, float, 2>,
Pos_ReadIndex<u8, float, 3>,
},
},
{
{
Pos_ReadIndex<u16, u8, 2>,
Pos_ReadIndex<u16, u8, 3>,
},
{
Pos_ReadIndex<u16, s8, 2>,
Pos_ReadIndex<u16, s8, 3>,
},
{
Pos_ReadIndex<u16, u16, 2>,
Pos_ReadIndex<u16, u16, 3>,
},
{
Pos_ReadIndex<u16, s16, 2>,
Pos_ReadIndex<u16, s16, 3>,
},
{
Pos_ReadIndex<u16, float, 2>,
Pos_ReadIndex<u16, float, 3>,
},
},
using Common::EnumMap;
// These functions are to work around a "too many initializer values" error with nested brackets
// C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
// (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
constexpr EnumMap<TPipelineFunction, CoordComponentCount::XYZ> e(TPipelineFunction xy,
TPipelineFunction xyz)
{
return {xy, xyz};
}
constexpr EnumMap<u32, CoordComponentCount::XYZ> e(u32 xy, u32 xyz)
{
return {xy, xyz};
}
constexpr EnumMap<EnumMap<TPipelineFunction, CoordComponentCount::XYZ>, ComponentFormat::Float>
f(EnumMap<EnumMap<TPipelineFunction, CoordComponentCount::XYZ>, ComponentFormat::Float> in)
{
return in;
}
constexpr EnumMap<EnumMap<u32, CoordComponentCount::XYZ>, ComponentFormat::Float>
g(EnumMap<EnumMap<u32, CoordComponentCount::XYZ>, ComponentFormat::Float> in)
{
return in;
}
template <typename T>
using Table = EnumMap<EnumMap<EnumMap<T, CoordComponentCount::XYZ>, ComponentFormat::Float>,
VertexComponentFormat::Index16>;
constexpr Table<TPipelineFunction> s_table_read_position = {
f({
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
}),
f({
e(Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>),
e(Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>),
e(Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>),
e(Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>),
e(Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>),
}),
f({
e(Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>),
e(Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>),
e(Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>),
e(Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>),
e(Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>),
}),
f({
e(Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>),
e(Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>),
e(Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>),
e(Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>),
e(Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>),
}),
};
constexpr u32 s_table_read_position_vertex_size[4][8][2] = {
{
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
},
{
{2, 3},
{2, 3},
{4, 6},
{4, 6},
{8, 12},
},
{
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
},
{
{2, 2},
{2, 2},
{2, 2},
{2, 2},
{2, 2},
},
constexpr Table<u32> s_table_read_position_vertex_size = {
g({
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
}),
g({
e(2, 3),
e(2, 3),
e(4, 6),
e(4, 6),
e(8, 12),
}),
g({
e(1, 1),
e(1, 1),
e(1, 1),
e(1, 1),
e(1, 1),
}),
g({
e(2, 2),
e(2, 2),
e(2, 2),
e(2, 2),
e(2, 2),
}),
};
} // Anonymous namespace
u32 VertexLoader_Position::GetSize(VertexComponentFormat type, ComponentFormat format,
CoordComponentCount elements)
{
return s_table_read_position_vertex_size[u32(type)][u32(format)][u32(elements)];
return s_table_read_position_vertex_size[type][format][elements];
}
TPipelineFunction VertexLoader_Position::GetFunction(VertexComponentFormat type,
ComponentFormat format,
CoordComponentCount elements)
{
return s_table_read_position[u32(type)][u32(format)][u32(elements)];
return s_table_read_position[type][format][elements];
}

View File

@ -55,8 +55,8 @@ void TexCoord_ReadIndex(VertexLoader* loader)
const auto index = DataRead<I>();
const auto data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] +
(index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
VertexLoaderManager::cached_arraybases[CPArray::TexCoord0 + loader->m_tcIndex] +
(index * g_main_cp_state.array_strides[CPArray::TexCoord0 + loader->m_tcIndex]));
const auto scale = loader->m_tcScale[loader->m_tcIndex];
DataReader dst(g_vertex_manager_write_ptr, nullptr);
@ -67,140 +67,110 @@ void TexCoord_ReadIndex(VertexLoader* loader)
++loader->m_tcIndex;
}
constexpr TPipelineFunction s_table_read_tex_coord[4][8][2] = {
{
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
},
{
{
TexCoord_ReadDirect<u8, 1>,
TexCoord_ReadDirect<u8, 2>,
},
{
TexCoord_ReadDirect<s8, 1>,
TexCoord_ReadDirect<s8, 2>,
},
{
TexCoord_ReadDirect<u16, 1>,
TexCoord_ReadDirect<u16, 2>,
},
{
TexCoord_ReadDirect<s16, 1>,
TexCoord_ReadDirect<s16, 2>,
},
{
TexCoord_ReadDirect<float, 1>,
TexCoord_ReadDirect<float, 2>,
},
},
{
{
TexCoord_ReadIndex<u8, u8, 1>,
TexCoord_ReadIndex<u8, u8, 2>,
},
{
TexCoord_ReadIndex<u8, s8, 1>,
TexCoord_ReadIndex<u8, s8, 2>,
},
{
TexCoord_ReadIndex<u8, u16, 1>,
TexCoord_ReadIndex<u8, u16, 2>,
},
{
TexCoord_ReadIndex<u8, s16, 1>,
TexCoord_ReadIndex<u8, s16, 2>,
},
{
TexCoord_ReadIndex<u8, float, 1>,
TexCoord_ReadIndex<u8, float, 2>,
},
},
{
{
TexCoord_ReadIndex<u16, u8, 1>,
TexCoord_ReadIndex<u16, u8, 2>,
},
{
TexCoord_ReadIndex<u16, s8, 1>,
TexCoord_ReadIndex<u16, s8, 2>,
},
{
TexCoord_ReadIndex<u16, u16, 1>,
TexCoord_ReadIndex<u16, u16, 2>,
},
{
TexCoord_ReadIndex<u16, s16, 1>,
TexCoord_ReadIndex<u16, s16, 2>,
},
{
TexCoord_ReadIndex<u16, float, 1>,
TexCoord_ReadIndex<u16, float, 2>,
},
},
using Common::EnumMap;
// These functions are to work around a "too many initializer values" error with nested brackets
// C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
// (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
constexpr EnumMap<TPipelineFunction, TexComponentCount::ST> e(TPipelineFunction s,
TPipelineFunction st)
{
return {s, st};
}
constexpr EnumMap<u32, TexComponentCount::ST> e(u32 s, u32 st)
{
return {s, st};
}
constexpr EnumMap<EnumMap<TPipelineFunction, TexComponentCount::ST>, ComponentFormat::Float>
f(EnumMap<EnumMap<TPipelineFunction, TexComponentCount::ST>, ComponentFormat::Float> in)
{
return in;
}
constexpr EnumMap<EnumMap<u32, TexComponentCount::ST>, ComponentFormat::Float>
g(EnumMap<EnumMap<u32, TexComponentCount::ST>, ComponentFormat::Float> in)
{
return in;
}
template <typename T>
using Table = EnumMap<EnumMap<EnumMap<T, TexComponentCount::ST>, ComponentFormat::Float>,
VertexComponentFormat::Index16>;
constexpr Table<TPipelineFunction> s_table_read_tex_coord = {
f({
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
}),
f({
e(TexCoord_ReadDirect<u8, 1>, TexCoord_ReadDirect<u8, 2>),
e(TexCoord_ReadDirect<s8, 1>, TexCoord_ReadDirect<s8, 2>),
e(TexCoord_ReadDirect<u16, 1>, TexCoord_ReadDirect<u16, 2>),
e(TexCoord_ReadDirect<s16, 1>, TexCoord_ReadDirect<s16, 2>),
e(TexCoord_ReadDirect<float, 1>, TexCoord_ReadDirect<float, 2>),
}),
f({
e(TexCoord_ReadIndex<u8, u8, 1>, TexCoord_ReadIndex<u8, u8, 2>),
e(TexCoord_ReadIndex<u8, s8, 1>, TexCoord_ReadIndex<u8, s8, 2>),
e(TexCoord_ReadIndex<u8, u16, 1>, TexCoord_ReadIndex<u8, u16, 2>),
e(TexCoord_ReadIndex<u8, s16, 1>, TexCoord_ReadIndex<u8, s16, 2>),
e(TexCoord_ReadIndex<u8, float, 1>, TexCoord_ReadIndex<u8, float, 2>),
}),
f({
e(TexCoord_ReadIndex<u16, u8, 1>, TexCoord_ReadIndex<u16, u8, 2>),
e(TexCoord_ReadIndex<u16, s8, 1>, TexCoord_ReadIndex<u16, s8, 2>),
e(TexCoord_ReadIndex<u16, u16, 1>, TexCoord_ReadIndex<u16, u16, 2>),
e(TexCoord_ReadIndex<u16, s16, 1>, TexCoord_ReadIndex<u16, s16, 2>),
e(TexCoord_ReadIndex<u16, float, 1>, TexCoord_ReadIndex<u16, float, 2>),
}),
};
constexpr u32 s_table_read_tex_coord_vertex_size[4][8][2] = {
{
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
},
{
{1, 2},
{1, 2},
{2, 4},
{2, 4},
{4, 8},
},
{
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
},
{
{2, 2},
{2, 2},
{2, 2},
{2, 2},
{2, 2},
},
constexpr Table<u32> s_table_read_tex_coord_vertex_size = {
g({
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
}),
g({
e(1, 2),
e(1, 2),
e(2, 4),
e(2, 4),
e(4, 8),
}),
g({
e(1, 1),
e(1, 1),
e(1, 1),
e(1, 1),
e(1, 1),
}),
g({
e(2, 2),
e(2, 2),
e(2, 2),
e(2, 2),
e(2, 2),
}),
};
} // Anonymous namespace
u32 VertexLoader_TextCoord::GetSize(VertexComponentFormat type, ComponentFormat format,
TexComponentCount elements)
{
return s_table_read_tex_coord_vertex_size[u32(type)][u32(format)][u32(elements)];
return s_table_read_tex_coord_vertex_size[type][format][elements];
}
TPipelineFunction VertexLoader_TextCoord::GetFunction(VertexComponentFormat type,
ComponentFormat format,
TexComponentCount elements)
{
return s_table_read_tex_coord[u32(type)][u32(format)][u32(elements)];
return s_table_read_tex_coord[type][format][elements];
}
TPipelineFunction VertexLoader_TextCoord::GetDummyFunction()

View File

@ -10,6 +10,7 @@
#include "Common/BitSet.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h"
#include "Common/MathUtil.h"
@ -38,8 +39,10 @@
std::unique_ptr<VertexManagerBase> g_vertex_manager;
using OpcodeDecoder::Primitive;
// GX primitive -> RenderState primitive, no primitive restart
constexpr std::array<PrimitiveType, 8> primitive_from_gx{{
constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx{
PrimitiveType::Triangles, // GX_DRAW_QUADS
PrimitiveType::Triangles, // GX_DRAW_QUADS_2
PrimitiveType::Triangles, // GX_DRAW_TRIANGLES
@ -48,10 +51,10 @@ constexpr std::array<PrimitiveType, 8> primitive_from_gx{{
PrimitiveType::Lines, // GX_DRAW_LINES
PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
PrimitiveType::Points, // GX_DRAW_POINTS
}};
};
// GX primitive -> RenderState primitive, using primitive restart
constexpr std::array<PrimitiveType, 8> primitive_from_gx_pr{{
constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx_pr{
PrimitiveType::TriangleStrip, // GX_DRAW_QUADS
PrimitiveType::TriangleStrip, // GX_DRAW_QUADS_2
PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLES
@ -60,7 +63,7 @@ constexpr std::array<PrimitiveType, 8> primitive_from_gx_pr{{
PrimitiveType::Lines, // GX_DRAW_LINES
PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
PrimitiveType::Points, // GX_DRAW_POINTS
}};
};
// Due to the BT.601 standard which the GameCube is based on being a compromise
// between PAL and NTSC, neither standard gets square pixels. They are each off
@ -107,13 +110,13 @@ u32 VertexManagerBase::GetRemainingSize() const
return static_cast<u32>(m_end_buffer_pointer - m_cur_buffer_pointer);
}
void VertexManagerBase::AddIndices(int primitive, u32 num_vertices)
void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
{
m_index_generator.AddIndices(primitive, num_vertices);
}
DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride,
bool cullall)
DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
u32 count, u32 stride, bool cullall)
{
// Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently.
g_framebuffer_manager->FlushEFBPokes();
@ -185,7 +188,7 @@ void VertexManagerBase::FlushData(u32 count, u32 stride)
m_cur_buffer_pointer += count * stride;
}
u32 VertexManagerBase::GetRemainingIndices(int primitive) const
u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const
{
const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen();
@ -193,22 +196,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const
{
switch (primitive)
{
case OpcodeDecoder::GX_DRAW_QUADS:
case OpcodeDecoder::GX_DRAW_QUADS_2:
case Primitive::GX_DRAW_QUADS:
case Primitive::GX_DRAW_QUADS_2:
return index_len / 5 * 4;
case OpcodeDecoder::GX_DRAW_TRIANGLES:
case Primitive::GX_DRAW_TRIANGLES:
return index_len / 4 * 3;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP:
case Primitive::GX_DRAW_TRIANGLE_STRIP:
return index_len / 1 - 1;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN:
case Primitive::GX_DRAW_TRIANGLE_FAN:
return index_len / 6 * 4 + 1;
case OpcodeDecoder::GX_DRAW_LINES:
case Primitive::GX_DRAW_LINES:
return index_len;
case OpcodeDecoder::GX_DRAW_LINE_STRIP:
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case OpcodeDecoder::GX_DRAW_POINTS:
case Primitive::GX_DRAW_POINTS:
return index_len;
default:
@ -219,22 +222,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const
{
switch (primitive)
{
case OpcodeDecoder::GX_DRAW_QUADS:
case OpcodeDecoder::GX_DRAW_QUADS_2:
case Primitive::GX_DRAW_QUADS:
case Primitive::GX_DRAW_QUADS_2:
return index_len / 6 * 4;
case OpcodeDecoder::GX_DRAW_TRIANGLES:
case Primitive::GX_DRAW_TRIANGLES:
return index_len;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP:
case Primitive::GX_DRAW_TRIANGLE_STRIP:
return index_len / 3 + 2;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN:
case Primitive::GX_DRAW_TRIANGLE_FAN:
return index_len / 3 + 2;
case OpcodeDecoder::GX_DRAW_LINES:
case Primitive::GX_DRAW_LINES:
return index_len;
case OpcodeDecoder::GX_DRAW_LINE_STRIP:
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case OpcodeDecoder::GX_DRAW_POINTS:
case Primitive::GX_DRAW_POINTS:
return index_len;
default:

View File

@ -35,6 +35,11 @@ enum TexelBufferFormat : u32
NUM_TEXEL_BUFFER_FORMATS
};
namespace OpcodeDecoder
{
enum class Primitive : u8;
};
class VertexManagerBase
{
private:
@ -93,8 +98,9 @@ public:
virtual bool Initialize();
PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
void AddIndices(int primitive, u32 num_vertices);
DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall);
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride,
bool cullall);
void FlushData(u32 count, u32 stride);
void Flush();
@ -163,7 +169,7 @@ protected:
virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex);
u32 GetRemainingSize() const;
u32 GetRemainingIndices(int primitive) const;
u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const;
void CalculateZSlope(NativeVertexFormat* format);
void LoadTextures();

View File

@ -23,6 +23,7 @@
#include "VideoCommon/FreeLookCamera.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"

View File

@ -13,8 +13,6 @@
#include "Common/EnumFormatter.h"
#include "VideoCommon/CPMemory.h"
class DataReader;
constexpr size_t NUM_XF_COLOR_CHANNELS = 2;
// Lighting
@ -454,10 +452,10 @@ struct XFMemory
u32 unk9[8]; // 0x1048 - 0x104f
PostMtxInfo postMtxInfo[8]; // 0x1050 - 0x1057
};
static_assert(sizeof(XFMemory) == sizeof(u32) * 0x1058);
static_assert(sizeof(XFMemory) == sizeof(u32) * XFMEM_REGISTERS_END);
extern XFMemory xfmem;
void LoadXFReg(u32 transferSize, u32 address, DataReader src);
void LoadIndexedXF(u32 val, int array);
void PreprocessIndexedXF(u32 val, int refarray);
void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data);
void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size);
void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size);

View File

@ -12,7 +12,6 @@
#include "Core/HW/Memmap.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelShaderManager.h"
@ -26,16 +25,10 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress)
VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize);
}
static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
static void XFRegWritten(u32 address, u32 value)
{
u32 address = baseAddress;
u32 dataIndex = 0;
while (transferSize > 0 && address < XFMEM_REGISTERS_END)
if (address >= XFMEM_REGISTERS_START && address < XFMEM_REGISTERS_END)
{
u32 newValue = src.Peek<u32>(dataIndex * sizeof(u32));
u32 nextAddress = address + 1;
switch (address)
{
case XFMEM_ERROR:
@ -44,12 +37,12 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_STATE1: // internal state 1
case XFMEM_CLOCK:
case XFMEM_SETGPMETRIC:
nextAddress = 0x1007;
// Not implemented
break;
case XFMEM_CLIPDISABLE:
{
ClipDisable setting{.hex = newValue};
ClipDisable setting{.hex = value};
if (setting.disable_clipping_detection)
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::SETS_XF_CLIPDISABLE_BIT_0);
if (setting.disable_trivial_rejection)
@ -63,7 +56,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
break;
case XFMEM_SETNUMCHAN:
if (xfmem.numChan.numColorChans != (newValue & 3))
if (xfmem.numChan.numColorChans != (value & 3))
g_vertex_manager->Flush();
VertexShaderManager::SetLightingConfigChanged();
break;
@ -72,7 +65,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_AMBCOLOR:
{
u8 chan = address - XFMEM_SETCHAN0_AMBCOLOR;
if (xfmem.ambColor[chan] != newValue)
if (xfmem.ambColor[chan] != value)
{
g_vertex_manager->Flush();
VertexShaderManager::SetMaterialColorChanged(chan);
@ -84,7 +77,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_MATCOLOR:
{
u8 chan = address - XFMEM_SETCHAN0_MATCOLOR;
if (xfmem.matColor[chan] != newValue)
if (xfmem.matColor[chan] != value)
{
g_vertex_manager->Flush();
VertexShaderManager::SetMaterialColorChanged(chan + 2);
@ -96,22 +89,22 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_COLOR:
case XFMEM_SETCHAN0_ALPHA: // Channel Alpha
case XFMEM_SETCHAN1_ALPHA:
if (((u32*)&xfmem)[address] != (newValue & 0x7fff))
if (((u32*)&xfmem)[address] != (value & 0x7fff))
g_vertex_manager->Flush();
VertexShaderManager::SetLightingConfigChanged();
break;
case XFMEM_DUALTEX:
if (xfmem.dualTexTrans.enabled != bool(newValue & 1))
if (xfmem.dualTexTrans.enabled != bool(value & 1))
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(-1);
break;
case XFMEM_SETMATRIXINDA:
VertexShaderManager::SetTexMatrixChangedA(newValue);
VertexShaderManager::SetTexMatrixChangedA(value);
break;
case XFMEM_SETMATRIXINDB:
VertexShaderManager::SetTexMatrixChangedB(newValue);
VertexShaderManager::SetTexMatrixChangedB(value);
break;
case XFMEM_SETVIEWPORT:
@ -124,8 +117,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
VertexShaderManager::SetViewportChanged();
PixelShaderManager::SetViewportChanged();
GeometryShaderManager::SetViewportChanged();
nextAddress = XFMEM_SETVIEWPORT + 6;
break;
case XFMEM_SETPROJECTION:
@ -138,12 +129,10 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
g_vertex_manager->Flush();
VertexShaderManager::SetProjectionChanged();
GeometryShaderManager::SetProjectionChanged();
nextAddress = XFMEM_SETPROJECTION + 7;
break;
case XFMEM_SETNUMTEXGENS: // GXSetNumTexGens
if (xfmem.numTexGen.numTexGens != (newValue & 15))
if (xfmem.numTexGen.numTexGens != (value & 15))
g_vertex_manager->Flush();
break;
@ -157,8 +146,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETTEXMTXINFO + 7:
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETTEXMTXINFO);
nextAddress = XFMEM_SETTEXMTXINFO + 8;
break;
case XFMEM_SETPOSTMTXINFO:
@ -171,8 +158,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETPOSTMTXINFO + 7:
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETPOSTMTXINFO);
nextAddress = XFMEM_SETPOSTMTXINFO + 8;
break;
// --------------
@ -189,7 +174,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case 0x104e:
case 0x104f:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, newValue);
DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, value);
break;
case 0x1013:
@ -200,83 +185,69 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, newValue);
WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, value);
break;
}
int transferred = nextAddress - address;
address = nextAddress;
transferSize -= transferred;
dataIndex += transferred;
}
}
void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src)
void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data)
{
// do not allow writes past registers
if (baseAddress + transferSize > XFMEM_REGISTERS_END)
if (base_address > XFMEM_REGISTERS_END)
{
WARN_LOG_FMT(VIDEO, "XF load exceeds address space: {:x} {} bytes", baseAddress, transferSize);
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND);
WARN_LOG_FMT(VIDEO, "XF load base address past end of address space: {:x} {} bytes",
base_address, transfer_size);
return;
}
if (baseAddress >= XFMEM_REGISTERS_END)
transferSize = 0;
else
transferSize = XFMEM_REGISTERS_END - baseAddress;
u32 end_address = base_address + transfer_size; // exclusive
// do not allow writes past registers
if (end_address > XFMEM_REGISTERS_END)
{
WARN_LOG_FMT(VIDEO, "XF load ends past end of address space: {:x} {} bytes", base_address,
transfer_size);
end_address = XFMEM_REGISTERS_END;
}
// write to XF mem
if (baseAddress < XFMEM_REGISTERS_START && transferSize > 0)
if (base_address < XFMEM_REGISTERS_START)
{
u32 end = baseAddress + transferSize;
const u32 xf_mem_base = base_address;
u32 xf_mem_transfer_size = transfer_size;
u32 xfMemBase = baseAddress;
u32 xfMemTransferSize = transferSize;
if (end >= XFMEM_REGISTERS_START)
if (end_address > XFMEM_REGISTERS_START)
{
xfMemTransferSize = XFMEM_REGISTERS_START - baseAddress;
baseAddress = XFMEM_REGISTERS_START;
transferSize = end - XFMEM_REGISTERS_START;
}
else
{
transferSize = 0;
xf_mem_transfer_size = XFMEM_REGISTERS_START - base_address;
base_address = XFMEM_REGISTERS_START;
}
XFMemWritten(xfMemTransferSize, xfMemBase);
for (u32 i = 0; i < xfMemTransferSize; i++)
XFMemWritten(xf_mem_transfer_size, xf_mem_base);
for (u32 i = 0; i < xf_mem_transfer_size; i++)
{
((u32*)&xfmem)[xfMemBase + i] = src.Read<u32>();
((u32*)&xfmem)[xf_mem_base + i] = Common::swap32(data);
data += 4;
}
}
// write to XF regs
if (transferSize > 0)
if (base_address >= XFMEM_REGISTERS_START)
{
XFRegWritten(transferSize, baseAddress, src);
for (u32 i = 0; i < transferSize; i++)
for (u32 address = base_address; address < end_address; address++)
{
((u32*)&xfmem)[baseAddress + i] = src.Read<u32>();
const u32 value = Common::swap32(data);
XFRegWritten(address, value);
((u32*)&xfmem)[address] = value;
data += 4;
}
}
}
constexpr std::tuple<u32, u32, u32> ExtractIndexedXF(u32 val)
{
const u32 index = val >> 16;
const u32 address = val & 0xFFF; // check mask
const u32 size = ((val >> 12) & 0xF) + 1;
return {index, address, size};
}
// TODO - verify that it is correct. Seems to work, though.
void LoadIndexedXF(u32 val, int refarray)
void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size)
{
const auto [index, address, size] = ExtractIndexedXF(val);
// load stuff from array to address in xf mem
u32* currData = (u32*)(&xfmem) + address;
@ -287,8 +258,8 @@ void LoadIndexedXF(u32 val, int refarray)
}
else
{
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] +
g_main_cp_state.array_strides[refarray] * index);
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] +
g_main_cp_state.array_strides[array] * index);
}
bool changed = false;
for (u32 i = 0; i < size; ++i)
@ -307,12 +278,10 @@ void LoadIndexedXF(u32 val, int refarray)
}
}
void PreprocessIndexedXF(u32 val, int refarray)
void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size)
{
const auto [index, address, size] = ExtractIndexedXF(val);
const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] +
g_preprocess_cp_state.array_strides[refarray] * index);
const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[array] +
g_preprocess_cp_state.array_strides[array] * index);
const size_t buf_size = size * sizeof(u32);
Fifo::PushFifoAuxBuffer(new_data, buf_size);
@ -581,13 +550,9 @@ std::string GetXFMemDescription(u32 address, u32 value)
}
}
std::pair<std::string, std::string> GetXFTransferInfo(const u8* data)
std::pair<std::string, std::string> GetXFTransferInfo(u16 base_address, u8 transfer_size,
const u8* data)
{
const u32 cmd = Common::swap32(data);
data += 4;
u32 base_address = cmd & 0xFFFF;
const u32 transfer_size = ((cmd >> 16) & 15) + 1;
if (base_address > XFMEM_REGISTERS_END)
{
return std::make_pair("Invalid XF Transfer", "Base address past end of address space");
@ -655,10 +620,9 @@ std::pair<std::string, std::string> GetXFTransferInfo(const u8* data)
return std::make_pair(fmt::to_string(name), fmt::to_string(desc));
}
std::pair<std::string, std::string> GetXFIndexedLoadInfo(u8 array, u32 value)
std::pair<std::string, std::string> GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address,
u8 size)
{
const auto [index, address, size] = ExtractIndexedXF(value);
const auto desc = fmt::format("Load {} bytes to XF address {:03x} from CP array {} row {}", size,
address, array, index);
fmt::memory_buffer written;

View File

@ -11,5 +11,7 @@
std::pair<std::string, std::string> GetXFRegInfo(u32 address, u32 value);
std::string GetXFMemName(u32 address);
std::string GetXFMemDescription(u32 address, u32 value);
std::pair<std::string, std::string> GetXFTransferInfo(const u8* data);
std::pair<std::string, std::string> GetXFIndexedLoadInfo(u8 array, u32 value);
std::pair<std::string, std::string> GetXFTransferInfo(u16 base_address, u8 transfer_size,
const u8* data);
std::pair<std::string, std::string> GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address,
u8 size);

View File

@ -46,6 +46,12 @@ TEST(EnumUtil, Enum1)
EXPECT_EQ(fmt::format("{:s}", Enum1::C), "0x2u /* C */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(3)), "0x3u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(4)), "0x4u /* Invalid */");
EXPECT_EQ(fmt::format("{:n}", Enum1::A), "A");
EXPECT_EQ(fmt::format("{:n}", Enum1::B), "B");
EXPECT_EQ(fmt::format("{:n}", Enum1::C), "C");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum1>(3)), "Invalid (3)");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum1>(4)), "Invalid (4)");
}
TEST(EnumUtil, Enum2)
@ -63,4 +69,11 @@ TEST(EnumUtil, Enum2)
EXPECT_EQ(fmt::format("{:s}", Enum2::F), "0x3u /* F */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(4)), "0x4u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(-1)), "0xffffffffu /* Invalid */");
EXPECT_EQ(fmt::format("{:n}", Enum2::D), "D");
EXPECT_EQ(fmt::format("{:n}", Enum2::E), "E");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(2)), "Invalid (2)");
EXPECT_EQ(fmt::format("{:n}", Enum2::F), "F");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(4)), "Invalid (4)");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(-1)), "Invalid (-1)");
}

View File

@ -174,8 +174,8 @@ TEST_P(VertexLoaderParamTest, PositionAll)
Input<u8>(i);
else
Input<u16>(i);
VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer();
g_main_cp_state.array_strides[ARRAY_POSITION] = elem_count * elem_size;
VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer();
g_main_cp_state.array_strides[CPArray::Position] = elem_count * elem_size;
}
CreateAndCheckSizes(input_size, elem_count * sizeof(float));
for (float value : values)
@ -243,8 +243,8 @@ TEST_F(VertexLoaderTest, PositionIndex16FloatXY)
CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float));
Input<u16>(1);
Input<u16>(0);
VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer();
g_main_cp_state.array_strides[ARRAY_POSITION] = sizeof(float); // ;)
VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer();
g_main_cp_state.array_strides[CPArray::Position] = sizeof(float); // ;)
Input(1.f);
Input(2.f);
Input(3.f);
@ -357,8 +357,8 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)
for (int i = 0; i < NUM_VERTEX_COMPONENT_ARRAYS; i++)
{
VertexLoaderManager::cached_arraybases[i] = m_src.GetPointer();
g_main_cp_state.array_strides[i] = 129;
VertexLoaderManager::cached_arraybases[static_cast<CPArray>(i)] = m_src.GetPointer();
g_main_cp_state.array_strides[static_cast<CPArray>(i)] = 129;
}
// This test is only done 100x in a row since it's ~20x slower using the