GPU: Implement PGXP for lines

This commit is contained in:
Stenzek 2024-12-22 14:06:13 +10:00
parent b81287efd2
commit 0b4e302c22
No known key found for this signature in database
13 changed files with 314 additions and 89 deletions

View File

@ -3131,6 +3131,11 @@ public:
ALWAYS_INLINE GSVector2 zw() const { return GSVector2(vget_high_s32(v4s)); }
ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h)
{
return GSVector4(vcombine_f32(l.v2s, h.v2s));
}
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const \
{ \

View File

@ -2316,6 +2316,11 @@ public:
ALWAYS_INLINE GSVector2 zw() const { return GSVector2(z, w); }
ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h)
{
return GSVector4(l.x, l.y, h.x, h.y);
}
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const { return GSVector4(F32[xn], F32[yn], F32[zn], F32[wn]); }

View File

@ -279,7 +279,7 @@ private:
/// Returns the number of vertices in the buffered poly-line.
ALWAYS_INLINE u32 GetPolyLineVertexCount() const
{
return (static_cast<u32>(m_blit_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >>
return (static_cast<u32>(m_polyline_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >>
BoolToUInt8(m_render_command.shading_enable);
}
@ -520,20 +520,20 @@ private:
u16 row;
} m_vram_transfer = {};
std::unique_ptr<GPUDump::Recorder> m_gpu_dump;
HeapFIFOQueue<u64, MAX_FIFO_SIZE> m_fifo;
std::vector<u32> m_blit_buffer;
TickCount m_max_run_ahead = 128;
u32 m_fifo_size = 128;
u32 m_blit_remaining_words;
GPURenderCommand m_render_command{};
std::unique_ptr<GPUDump::Recorder> m_gpu_dump;
std::vector<u32> m_blit_buffer;
std::vector<u64> m_polyline_buffer;
ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); }
ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); }
ALWAYS_INLINE u32 FifoPeek(u32 i) { return Truncate32(m_fifo.Peek(i)); }
TickCount m_max_run_ahead = 128;
u32 m_fifo_size = 128;
private:
using GP0CommandHandler = bool (GPU::*)();
using GP0CommandHandlerTable = std::array<GP0CommandHandler, 256>;

View File

@ -249,6 +249,16 @@ GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices)
return cmd;
}
GPUBackendDrawPreciseLineCommand* GPUBackend::NewDrawPreciseLineCommand(u32 num_vertices)
{
const u32 size =
sizeof(GPUBackendDrawPreciseLineCommand) + (num_vertices * sizeof(GPUBackendDrawPreciseLineCommand::Vertex));
GPUBackendDrawPreciseLineCommand* cmd = static_cast<GPUBackendDrawPreciseLineCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::DrawPreciseLine, size));
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
void GPUBackend::PushCommand(GPUThreadCommand* cmd)
{
GPUThread::PushCommand(cmd);
@ -489,6 +499,15 @@ void GPUBackend::HandleCommand(const GPUThreadCommand* cmd)
s_counters.num_primitives += ccmd->num_vertices / 2;
DrawLine(ccmd);
}
break;
case GPUBackendCommandType::DrawPreciseLine:
{
const GPUBackendDrawPreciseLineCommand* ccmd = static_cast<const GPUBackendDrawPreciseLineCommand*>(cmd);
s_counters.num_vertices += ccmd->num_vertices;
s_counters.num_primitives += ccmd->num_vertices / 2;
DrawPreciseLine(ccmd);
}
break;
DefaultCaseIsUnreachable();

View File

@ -51,6 +51,7 @@ public:
static GPUBackendDrawPrecisePolygonCommand* NewDrawPrecisePolygonCommand(u32 num_vertices);
static GPUBackendDrawRectangleCommand* NewDrawRectangleCommand();
static GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices);
static GPUBackendDrawPreciseLineCommand* NewDrawPreciseLineCommand(u32 num_vertices);
static void PushCommand(GPUThreadCommand* cmd);
static void PushCommandAndWakeThread(GPUThreadCommand* cmd);
static void PushCommandAndSync(GPUThreadCommand* cmd, bool spin);
@ -125,6 +126,7 @@ protected:
virtual void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) = 0;
virtual void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) = 0;
virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0;
virtual void DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) = 0;
virtual void DrawingAreaChanged() = 0;
virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0;

View File

@ -73,7 +73,7 @@ void GPU::TryExecuteCommands()
{
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
u32 terminator_index =
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
m_render_command.shading_enable ? ((static_cast<u32>(m_polyline_buffer.size()) & 1u) ^ 1u) : 0u;
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
{
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
@ -86,9 +86,9 @@ void GPU::TryExecuteCommands()
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
if (words_to_copy > 0)
{
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
m_polyline_buffer.reserve(m_polyline_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
m_polyline_buffer.push_back(m_fifo.Pop());
}
DEBUG_LOG("Added {} words to polyline", words_to_copy);
@ -98,7 +98,7 @@ void GPU::TryExecuteCommands()
m_fifo.RemoveOne();
DEBUG_LOG("Drawing poly-line with {} vertices", GetPolyLineVertexCount());
FinishPolyline();
m_blit_buffer.clear();
m_polyline_buffer.clear();
EndCommand();
continue;
}
@ -713,7 +713,53 @@ bool GPU::HandleRenderLineCommand()
m_fifo.RemoveOne();
PrepareForDraw();
GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand(2);
if (g_settings.gpu_pgxp_enable)
{
GPUBackendDrawPreciseLineCommand* RESTRICT cmd = GPUBackend::NewDrawPreciseLineCommand(2);
FillDrawCommand(cmd, rc);
cmd->palette.bits = 0;
bool valid_w = g_settings.gpu_pgxp_texture_correction;
for (u32 i = 0; i < 2; i++)
{
const u32 color = ((i != 0 && rc.shading_enable) ? FifoPop() : rc.bits) & UINT32_C(0x00FFFFFF);
const u64 maddr_and_pos = m_fifo.Pop();
const GPUVertexPosition vp{Truncate32(maddr_and_pos)};
GPUBackendDrawPreciseLineCommand::Vertex* RESTRICT vert = &cmd->vertices[i];
vert->native_x = m_drawing_offset.x + vp.x;
vert->native_y = m_drawing_offset.y + vp.y;
vert->color = color;
valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->native_x, vert->native_y,
m_drawing_offset.x, m_drawing_offset.y, &vert->x, &vert->y, &vert->w);
}
if (!(cmd->valid_w = valid_w))
{
for (u32 i = 0; i < 2; i++)
cmd->vertices[i].w = 1.0f;
}
const GSVector2 v0f = GSVector2::load<false>(&cmd->vertices[0].x);
const GSVector2 v1f = GSVector2::load<false>(&cmd->vertices[1].x);
const GSVector4i rect =
GSVector4i(GSVector4(v0f.min(v1f)).upld(GSVector4(v0f.max(v1f)))).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
{
DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y,
cmd->vertices[1].x, cmd->vertices[1].y);
EndCommand();
return true;
}
AddDrawLineTicks(clamped_rect, rc.shading_enable);
GPUBackend::PushCommand(cmd);
}
else
{
GPUBackendDrawLineCommand* RESTRICT cmd = GPUBackend::NewDrawLineCommand(2);
FillDrawCommand(cmd, rc);
cmd->palette.bits = 0;
@ -758,6 +804,8 @@ bool GPU::HandleRenderLineCommand()
AddDrawLineTicks(clamped_rect, rc.shading_enable);
GPUBackend::PushCommand(cmd);
}
EndCommand();
return true;
}
@ -784,9 +832,9 @@ bool GPU::HandleRenderPolyLineCommand()
const u32 words_to_pop = min_words - 1;
// m_blit_buffer.resize(words_to_pop);
// FifoPopRange(m_blit_buffer.data(), words_to_pop);
m_blit_buffer.reserve(words_to_pop);
m_polyline_buffer.reserve(words_to_pop);
for (u32 i = 0; i < words_to_pop; i++)
m_blit_buffer.push_back(Truncate32(FifoPop()));
m_polyline_buffer.push_back(m_fifo.Pop());
// polyline goes via a different path through the blit buffer
m_blitter_state = BlitterState::DrawingPolyLine;
@ -801,11 +849,73 @@ void GPU::FinishPolyline()
const u32 num_vertices = GetPolyLineVertexCount();
DebugAssert(num_vertices >= 2);
GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand((num_vertices - 1) * 2);
if (g_settings.gpu_pgxp_enable)
{
GPUBackendDrawPreciseLineCommand* RESTRICT cmd = GPUBackend::NewDrawPreciseLineCommand((num_vertices - 1) * 2);
FillDrawCommand(cmd, m_render_command);
cmd->palette.bits = 0;
u32 buffer_pos = 0;
const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]};
u32 out_vertex_count = 0;
const bool shaded = m_render_command.shading_enable;
bool valid_w = g_settings.gpu_pgxp_texture_correction;
GPUBackendDrawPreciseLineCommand::Vertex start, end;
const auto read_vertex = [this, &buffer_pos, &valid_w](GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT dest,
u32 color) {
const u64 maddr_and_pos = m_polyline_buffer[buffer_pos++];
const GPUVertexPosition vp{Truncate32(maddr_and_pos)};
dest.native_x = m_drawing_offset.x + vp.x;
dest.native_y = m_drawing_offset.y + vp.y;
dest.color = color;
valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, dest.native_x, dest.native_y,
m_drawing_offset.x, m_drawing_offset.y, &dest.x, &dest.y, &dest.w);
};
read_vertex(start, m_render_command.color_for_first_vertex);
for (u32 i = 1; i < num_vertices; i++)
{
const u32 color =
(shaded ? Truncate32(m_polyline_buffer[buffer_pos++]) : m_render_command.bits) & UINT32_C(0x00FFFFFF);
read_vertex(end, color);
const GSVector2 start_pos = GSVector2::load<false>(&start.x);
const GSVector2 end_pos = GSVector2::load<false>(&end.x);
const GSVector4i rect =
GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
{
DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y);
}
else
{
AddDrawLineTicks(clamped_rect, m_render_command.shading_enable);
cmd->vertices[out_vertex_count++] = start;
cmd->vertices[out_vertex_count++] = end;
}
start = end;
}
if (out_vertex_count > 0)
{
DebugAssert(out_vertex_count <= cmd->num_vertices);
cmd->num_vertices = Truncate16(out_vertex_count);
GPUBackend::PushCommand(cmd);
}
}
else
{
GPUBackendDrawLineCommand* RESTRICT cmd = GPUBackend::NewDrawLineCommand((num_vertices - 1) * 2);
FillDrawCommand(cmd, m_render_command);
cmd->palette.bits = 0;
u32 buffer_pos = 0;
const GPUVertexPosition start_vp{Truncate32(m_polyline_buffer[buffer_pos++])};
const GSVector2i draw_offset = GSVector2i::load<false>(&m_drawing_offset.x);
GSVector2i start_pos = GSVector2i(start_vp.x, start_vp.y).add32(draw_offset);
u32 start_color = m_render_command.color_for_first_vertex;
@ -814,9 +924,9 @@ void GPU::FinishPolyline()
u32 out_vertex_count = 0;
for (u32 i = 1; i < num_vertices; i++)
{
const u32 end_color =
shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex;
const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]};
const u32 end_color = shaded ? (Truncate32(m_polyline_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF))) :
m_render_command.color_for_first_vertex;
const GPUVertexPosition vp{Truncate32(m_polyline_buffer[buffer_pos++])};
const GSVector2i end_pos = GSVector2i(vp.x, vp.y).add32(draw_offset);
const GSVector4i rect =
@ -850,6 +960,7 @@ void GPU::FinishPolyline()
cmd->num_vertices = Truncate16(out_vertex_count);
GPUBackend::PushCommand(cmd);
}
}
}
bool GPU::HandleFillRectangleCommand()

View File

@ -2500,6 +2500,54 @@ void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)
}
}
void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)
{
PrepareDraw(cmd);
const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w;
SetBatchDepthBuffer(cmd, use_depth);
const u32 num_vertices = cmd->num_vertices;
DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
const float depth = GetCurrentNormalizedVertexDepth();
for (u32 i = 0; i < num_vertices; i += 2)
{
const GSVector2 start_pos = GSVector2::load<false>(&cmd->vertices[i].x);
const u32 start_color = cmd->vertices[i].color;
const GSVector2 end_pos = GSVector2::load<false>(&cmd->vertices[i + 1].x);
const u32 end_color = cmd->vertices[i + 1].color;
const GSVector4 bounds = GSVector4::xyxy(start_pos, end_pos);
const GSVector4i rect =
GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty());
AddDrawnRectangle(clamped_rect);
DrawLine(bounds, start_color, end_color, depth);
}
if (ShouldDrawWithSoftwareRenderer())
{
const GPU_SW_Rasterizer::DrawLineFunction DrawFunction =
GPU_SW_Rasterizer::GetDrawLineFunction(cmd->shading_enable, cmd->transparency_enable);
for (u32 i = 0; i < cmd->num_vertices; i += 2)
{
const GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT start = cmd->vertices[i];
const GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT end = cmd->vertices[i + 1];
const GPUBackendDrawLineCommand::Vertex vertices[2] = {
{.x = start.native_x, .y = start.native_y, .color = start.color},
{.x = end.native_x, .y = end.native_y, .color = end.color},
};
DrawFunction(cmd, &vertices[0], &vertices[1]);
}
}
}
void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth)
{
DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6);
@ -3003,6 +3051,7 @@ void GPU_HW::EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd)
required_indices = MAX_VERTICES_FOR_RECTANGLE;
break;
case GPUBackendCommandType::DrawLine:
case GPUBackendCommandType::DrawPreciseLine:
{
// assume expansion
const GPUBackendDrawLineCommand* lcmd = static_cast<const GPUBackendDrawLineCommand*>(cmd);

View File

@ -89,6 +89,7 @@ protected:
void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override;
void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override;
void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
void DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) override;
void FlushRender() override;
void DrawingAreaChanged() override;

View File

@ -150,6 +150,26 @@ void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd)
DrawFunction(cmd, &cmd->vertices[i], &cmd->vertices[i + 1]);
}
void GPU_SW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)
{
const GPU_SW_Rasterizer::DrawLineFunction DrawFunction =
GPU_SW_Rasterizer::GetDrawLineFunction(cmd->shading_enable, cmd->transparency_enable);
// Need to cut out the irrelevant bits.
// TODO: In _theory_ we could use the fixed-point parts here.
for (u32 i = 0; i < cmd->num_vertices; i += 2)
{
const GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT start = cmd->vertices[i];
const GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT end = cmd->vertices[i + 1];
const GPUBackendDrawLineCommand::Vertex vertices[2] = {
{.x = start.native_x, .y = start.native_y, .color = start.color},
{.x = end.native_x, .y = end.native_y, .color = end.color},
};
DrawFunction(cmd, &vertices[0], &vertices[1]);
}
}
void GPU_SW::DrawingAreaChanged()
{
// GPU_SW_Rasterizer::g_drawing_area set by base class.

View File

@ -36,6 +36,7 @@ protected:
void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override;
void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override;
void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
void DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) override;
void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override;
void DrawingAreaChanged() override;
void ClearCache() override;

View File

@ -32,7 +32,7 @@ using DrawTriangleFunction = void (*)(const GPUBackendDrawCommand* cmd, const GP
const GPUBackendDrawPolygonCommand::Vertex* v2);
typedef const DrawTriangleFunction DrawTriangleFunctionTable[2][2][2][2];
using DrawLineFunction = void (*)(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
using DrawLineFunction = void (*)(const GPUBackendDrawCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1);
typedef const DrawLineFunction DrawLineFunctionTable[2][2];

View File

@ -766,8 +766,7 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* RESTRICT cmd)
// TODO: Vectorize line draw.
template<bool shading_enable, bool transparency_enable>
static void DrawLine(const GPUBackendDrawLineCommand* RESTRICT cmd,
const GPUBackendDrawLineCommand::Vertex* RESTRICT p0,
static void DrawLine(const GPUBackendDrawCommand* RESTRICT cmd, const GPUBackendDrawLineCommand::Vertex* RESTRICT p0,
const GPUBackendDrawLineCommand::Vertex* RESTRICT p1)
{
static constexpr u32 XY_SHIFT = 32;

View File

@ -54,6 +54,7 @@ enum class GPUBackendCommandType : u8
DrawPrecisePolygon,
DrawRectangle,
DrawLine,
DrawPreciseLine,
};
struct GPUThreadCommand
@ -317,6 +318,18 @@ struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand
Vertex vertices[0];
};
struct GPUBackendDrawPreciseLineCommand : public GPUBackendDrawCommand
{
struct Vertex
{
float x, y, w;
s32 native_x, native_y;
u32 color;
};
Vertex vertices[0];
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif