From 0b4e302c22d6efcc1c8806711941610854e8bb91 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 22 Dec 2024 14:06:13 +1000 Subject: [PATCH] GPU: Implement PGXP for lines --- src/common/gsvector_neon.h | 5 + src/common/gsvector_nosimd.h | 5 + src/core/gpu.h | 14 +- src/core/gpu_backend.cpp | 19 +++ src/core/gpu_backend.h | 2 + src/core/gpu_commands.cpp | 269 +++++++++++++++++++++++---------- src/core/gpu_hw.cpp | 49 ++++++ src/core/gpu_hw.h | 1 + src/core/gpu_sw.cpp | 20 +++ src/core/gpu_sw.h | 1 + src/core/gpu_sw_rasterizer.h | 2 +- src/core/gpu_sw_rasterizer.inl | 3 +- src/core/gpu_thread_commands.h | 13 ++ 13 files changed, 314 insertions(+), 89 deletions(-) diff --git a/src/common/gsvector_neon.h b/src/common/gsvector_neon.h index dd52f8fc9..8c6122210 100644 --- a/src/common/gsvector_neon.h +++ b/src/common/gsvector_neon.h @@ -3131,6 +3131,11 @@ public: ALWAYS_INLINE GSVector2 zw() const { return GSVector2(vget_high_s32(v4s)); } + ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h) + { + return GSVector4(vcombine_f32(l.v2s, h.v2s)); + } + #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const \ { \ diff --git a/src/common/gsvector_nosimd.h b/src/common/gsvector_nosimd.h index 71df7f28b..d40b80f03 100644 --- a/src/common/gsvector_nosimd.h +++ b/src/common/gsvector_nosimd.h @@ -2316,6 +2316,11 @@ public: ALWAYS_INLINE GSVector2 zw() const { return GSVector2(z, w); } + ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h) + { + return GSVector4(l.x, l.y, h.x, h.y); + } + #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const { return GSVector4(F32[xn], F32[yn], F32[zn], F32[wn]); } diff --git a/src/core/gpu.h b/src/core/gpu.h index 6fd7778aa..344236c25 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -279,7 +279,7 @@ private: /// Returns the number of vertices in the buffered poly-line. ALWAYS_INLINE u32 GetPolyLineVertexCount() const { - return (static_cast(m_blit_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >> + return (static_cast(m_polyline_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >> BoolToUInt8(m_render_command.shading_enable); } @@ -520,20 +520,20 @@ private: u16 row; } m_vram_transfer = {}; + std::unique_ptr m_gpu_dump; + HeapFIFOQueue m_fifo; - std::vector m_blit_buffer; + TickCount m_max_run_ahead = 128; + u32 m_fifo_size = 128; u32 m_blit_remaining_words; GPURenderCommand m_render_command{}; - - std::unique_ptr m_gpu_dump; + std::vector m_blit_buffer; + std::vector m_polyline_buffer; ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); } ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); } ALWAYS_INLINE u32 FifoPeek(u32 i) { return Truncate32(m_fifo.Peek(i)); } - TickCount m_max_run_ahead = 128; - u32 m_fifo_size = 128; - private: using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandlerTable = std::array; diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index ca3bae9fe..871e344a3 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -249,6 +249,16 @@ GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices) return cmd; } +GPUBackendDrawPreciseLineCommand* GPUBackend::NewDrawPreciseLineCommand(u32 num_vertices) +{ + const u32 size = + sizeof(GPUBackendDrawPreciseLineCommand) + (num_vertices * sizeof(GPUBackendDrawPreciseLineCommand::Vertex)); + GPUBackendDrawPreciseLineCommand* cmd = static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::DrawPreciseLine, size)); + cmd->num_vertices = Truncate16(num_vertices); + return cmd; +} + void GPUBackend::PushCommand(GPUThreadCommand* cmd) { GPUThread::PushCommand(cmd); @@ -489,6 +499,15 @@ void GPUBackend::HandleCommand(const GPUThreadCommand* cmd) s_counters.num_primitives += ccmd->num_vertices / 2; DrawLine(ccmd); } + break; + + case GPUBackendCommandType::DrawPreciseLine: + { + const GPUBackendDrawPreciseLineCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives += ccmd->num_vertices / 2; + DrawPreciseLine(ccmd); + } break; DefaultCaseIsUnreachable(); diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index 2808cce4f..3053c9490 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -51,6 +51,7 @@ public: static GPUBackendDrawPrecisePolygonCommand* NewDrawPrecisePolygonCommand(u32 num_vertices); static GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); static GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + static GPUBackendDrawPreciseLineCommand* NewDrawPreciseLineCommand(u32 num_vertices); static void PushCommand(GPUThreadCommand* cmd); static void PushCommandAndWakeThread(GPUThreadCommand* cmd); static void PushCommandAndSync(GPUThreadCommand* cmd, bool spin); @@ -125,6 +126,7 @@ protected: virtual void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) = 0; virtual void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) = 0; virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; + virtual void DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) = 0; virtual void DrawingAreaChanged() = 0; virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 5ee6d540e..a57f6c421 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -73,7 +73,7 @@ void GPU::TryExecuteCommands() { const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1; u32 terminator_index = - m_render_command.shading_enable ? ((static_cast(m_blit_buffer.size()) & 1u) ^ 1u) : 0u; + m_render_command.shading_enable ? ((static_cast(m_polyline_buffer.size()) & 1u) ^ 1u) : 0u; for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex) { // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. @@ -86,9 +86,9 @@ void GPU::TryExecuteCommands() const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize()); if (words_to_copy > 0) { - m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy); + m_polyline_buffer.reserve(m_polyline_buffer.size() + words_to_copy); for (u32 i = 0; i < words_to_copy; i++) - m_blit_buffer.push_back(FifoPop()); + m_polyline_buffer.push_back(m_fifo.Pop()); } DEBUG_LOG("Added {} words to polyline", words_to_copy); @@ -98,7 +98,7 @@ void GPU::TryExecuteCommands() m_fifo.RemoveOne(); DEBUG_LOG("Drawing poly-line with {} vertices", GetPolyLineVertexCount()); FinishPolyline(); - m_blit_buffer.clear(); + m_polyline_buffer.clear(); EndCommand(); continue; } @@ -713,51 +713,99 @@ bool GPU::HandleRenderLineCommand() m_fifo.RemoveOne(); PrepareForDraw(); - GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand(2); - FillDrawCommand(cmd, rc); - cmd->palette.bits = 0; - if (rc.shading_enable) + if (g_settings.gpu_pgxp_enable) { - cmd->vertices[0].color = rc.color_for_first_vertex; - const GPUVertexPosition start_pos{FifoPop()}; - cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; - cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + GPUBackendDrawPreciseLineCommand* RESTRICT cmd = GPUBackend::NewDrawPreciseLineCommand(2); + FillDrawCommand(cmd, rc); + cmd->palette.bits = 0; - cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); - const GPUVertexPosition end_pos{FifoPop()}; - cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; - cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + bool valid_w = g_settings.gpu_pgxp_texture_correction; + for (u32 i = 0; i < 2; i++) + { + const u32 color = ((i != 0 && rc.shading_enable) ? FifoPop() : rc.bits) & UINT32_C(0x00FFFFFF); + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + GPUBackendDrawPreciseLineCommand::Vertex* RESTRICT vert = &cmd->vertices[i]; + vert->native_x = m_drawing_offset.x + vp.x; + vert->native_y = m_drawing_offset.y + vp.y; + vert->color = color; + + valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->native_x, vert->native_y, + m_drawing_offset.x, m_drawing_offset.y, &vert->x, &vert->y, &vert->w); + } + if (!(cmd->valid_w = valid_w)) + { + for (u32 i = 0; i < 2; i++) + cmd->vertices[i].w = 1.0f; + } + + const GSVector2 v0f = GSVector2::load(&cmd->vertices[0].x); + const GSVector2 v1f = GSVector2::load(&cmd->vertices[1].x); + const GSVector4i rect = + GSVector4i(GSVector4(v0f.min(v1f)).upld(GSVector4(v0f.max(v1f)))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y); + EndCommand(); + return true; + } + + AddDrawLineTicks(clamped_rect, rc.shading_enable); + GPUBackend::PushCommand(cmd); } else { - cmd->vertices[0].color = rc.color_for_first_vertex; - cmd->vertices[1].color = rc.color_for_first_vertex; + GPUBackendDrawLineCommand* RESTRICT cmd = GPUBackend::NewDrawLineCommand(2); + FillDrawCommand(cmd, rc); + cmd->palette.bits = 0; - const GPUVertexPosition start_pos{FifoPop()}; - cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; - cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + if (rc.shading_enable) + { + cmd->vertices[0].color = rc.color_for_first_vertex; + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; - const GPUVertexPosition end_pos{FifoPop()}; - cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; - cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + else + { + cmd->vertices[0].color = rc.color_for_first_vertex; + cmd->vertices[1].color = rc.color_for_first_vertex; + + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + + const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); + const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); + const GSVector4i rect = GSVector4i::xyxy(v0.min_s32(v1), v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y); + EndCommand(); + return true; + } + + AddDrawLineTicks(clamped_rect, rc.shading_enable); + GPUBackend::PushCommand(cmd); } - const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); - const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); - const GSVector4i rect = GSVector4i::xyxy(v0.min_s32(v1), v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, - cmd->vertices[1].x, cmd->vertices[1].y); - EndCommand(); - return true; - } - - AddDrawLineTicks(clamped_rect, rc.shading_enable); - GPUBackend::PushCommand(cmd); EndCommand(); return true; } @@ -784,9 +832,9 @@ bool GPU::HandleRenderPolyLineCommand() const u32 words_to_pop = min_words - 1; // m_blit_buffer.resize(words_to_pop); // FifoPopRange(m_blit_buffer.data(), words_to_pop); - m_blit_buffer.reserve(words_to_pop); + m_polyline_buffer.reserve(words_to_pop); for (u32 i = 0; i < words_to_pop; i++) - m_blit_buffer.push_back(Truncate32(FifoPop())); + m_polyline_buffer.push_back(m_fifo.Pop()); // polyline goes via a different path through the blit buffer m_blitter_state = BlitterState::DrawingPolyLine; @@ -801,54 +849,117 @@ void GPU::FinishPolyline() const u32 num_vertices = GetPolyLineVertexCount(); DebugAssert(num_vertices >= 2); - GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand((num_vertices - 1) * 2); - FillDrawCommand(cmd, m_render_command); - - u32 buffer_pos = 0; - const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - const GSVector2i draw_offset = GSVector2i::load(&m_drawing_offset.x); - GSVector2i start_pos = GSVector2i(start_vp.x, start_vp.y).add32(draw_offset); - u32 start_color = m_render_command.color_for_first_vertex; - - const bool shaded = m_render_command.shading_enable; - u32 out_vertex_count = 0; - for (u32 i = 1; i < num_vertices; i++) + if (g_settings.gpu_pgxp_enable) { - const u32 end_color = - shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; - const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; - const GSVector2i end_pos = GSVector2i(vp.x, vp.y).add32(draw_offset); + GPUBackendDrawPreciseLineCommand* RESTRICT cmd = GPUBackend::NewDrawPreciseLineCommand((num_vertices - 1) * 2); + FillDrawCommand(cmd, m_render_command); + cmd->palette.bits = 0; - const GSVector4i rect = - GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + u32 buffer_pos = 0; + u32 out_vertex_count = 0; + const bool shaded = m_render_command.shading_enable; + bool valid_w = g_settings.gpu_pgxp_texture_correction; + GPUBackendDrawPreciseLineCommand::Vertex start, end; - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + const auto read_vertex = [this, &buffer_pos, &valid_w](GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT dest, + u32 color) { + const u64 maddr_and_pos = m_polyline_buffer[buffer_pos++]; + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + dest.native_x = m_drawing_offset.x + vp.x; + dest.native_y = m_drawing_offset.y + vp.y; + dest.color = color; + valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, dest.native_x, dest.native_y, + m_drawing_offset.x, m_drawing_offset.y, &dest.x, &dest.y, &dest.w); + }; + + read_vertex(start, m_render_command.color_for_first_vertex); + + for (u32 i = 1; i < num_vertices; i++) { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); - } - else - { - AddDrawLineTicks(clamped_rect, m_render_command.shading_enable); + const u32 color = + (shaded ? Truncate32(m_polyline_buffer[buffer_pos++]) : m_render_command.bits) & UINT32_C(0x00FFFFFF); + read_vertex(end, color); - GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count]; - out_vertex_count += 2; + const GSVector2 start_pos = GSVector2::load(&start.x); + const GSVector2 end_pos = GSVector2::load(&end.x); + const GSVector4i rect = + GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - GSVector2i::store(&out_vertex[0].x, start_pos); - out_vertex[0].color = start_color; - GSVector2i::store(&out_vertex[1].x, end_pos); - out_vertex[1].color = end_color; + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); + } + else + { + AddDrawLineTicks(clamped_rect, m_render_command.shading_enable); + + cmd->vertices[out_vertex_count++] = start; + cmd->vertices[out_vertex_count++] = end; + } + + start = end; } - start_pos = end_pos; - start_color = end_color; + if (out_vertex_count > 0) + { + DebugAssert(out_vertex_count <= cmd->num_vertices); + cmd->num_vertices = Truncate16(out_vertex_count); + GPUBackend::PushCommand(cmd); + } } - - if (out_vertex_count > 0) + else { - DebugAssert(out_vertex_count <= cmd->num_vertices); - cmd->num_vertices = Truncate16(out_vertex_count); - GPUBackend::PushCommand(cmd); + GPUBackendDrawLineCommand* RESTRICT cmd = GPUBackend::NewDrawLineCommand((num_vertices - 1) * 2); + FillDrawCommand(cmd, m_render_command); + cmd->palette.bits = 0; + + u32 buffer_pos = 0; + const GPUVertexPosition start_vp{Truncate32(m_polyline_buffer[buffer_pos++])}; + const GSVector2i draw_offset = GSVector2i::load(&m_drawing_offset.x); + GSVector2i start_pos = GSVector2i(start_vp.x, start_vp.y).add32(draw_offset); + u32 start_color = m_render_command.color_for_first_vertex; + + const bool shaded = m_render_command.shading_enable; + u32 out_vertex_count = 0; + for (u32 i = 1; i < num_vertices; i++) + { + const u32 end_color = shaded ? (Truncate32(m_polyline_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF))) : + m_render_command.color_for_first_vertex; + const GPUVertexPosition vp{Truncate32(m_polyline_buffer[buffer_pos++])}; + const GSVector2i end_pos = GSVector2i(vp.x, vp.y).add32(draw_offset); + + const GSVector4i rect = + GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); + } + else + { + AddDrawLineTicks(clamped_rect, m_render_command.shading_enable); + + GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count]; + out_vertex_count += 2; + + GSVector2i::store(&out_vertex[0].x, start_pos); + out_vertex[0].color = start_color; + GSVector2i::store(&out_vertex[1].x, end_pos); + out_vertex[1].color = end_color; + } + + start_pos = end_pos; + start_color = end_color; + } + + if (out_vertex_count > 0) + { + DebugAssert(out_vertex_count <= cmd->num_vertices); + cmd->num_vertices = Truncate16(out_vertex_count); + GPUBackend::PushCommand(cmd); + } } } diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 56af8ede0..d276861c5 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -2500,6 +2500,54 @@ void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd) } } +void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) +{ + PrepareDraw(cmd); + + const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w; + SetBatchDepthBuffer(cmd, use_depth); + + const u32 num_vertices = cmd->num_vertices; + DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); + + const float depth = GetCurrentNormalizedVertexDepth(); + + for (u32 i = 0; i < num_vertices; i += 2) + { + const GSVector2 start_pos = GSVector2::load(&cmd->vertices[i].x); + const u32 start_color = cmd->vertices[i].color; + const GSVector2 end_pos = GSVector2::load(&cmd->vertices[i + 1].x); + const u32 end_color = cmd->vertices[i + 1].color; + + const GSVector4 bounds = GSVector4::xyxy(start_pos, end_pos); + const GSVector4i rect = + GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty()); + + AddDrawnRectangle(clamped_rect); + DrawLine(bounds, start_color, end_color, depth); + } + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawLineFunction(cmd->shading_enable, cmd->transparency_enable); + + for (u32 i = 0; i < cmd->num_vertices; i += 2) + { + const GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT start = cmd->vertices[i]; + const GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT end = cmd->vertices[i + 1]; + const GPUBackendDrawLineCommand::Vertex vertices[2] = { + {.x = start.native_x, .y = start.native_y, .color = start.color}, + {.x = end.native_x, .y = end.native_y, .color = end.color}, + }; + + DrawFunction(cmd, &vertices[0], &vertices[1]); + } + } +} + void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) { DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); @@ -3003,6 +3051,7 @@ void GPU_HW::EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd) required_indices = MAX_VERTICES_FOR_RECTANGLE; break; case GPUBackendCommandType::DrawLine: + case GPUBackendCommandType::DrawPreciseLine: { // assume expansion const GPUBackendDrawLineCommand* lcmd = static_cast(cmd); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 70606999e..54366fc12 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -89,6 +89,7 @@ protected: void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override; void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + void DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) override; void FlushRender() override; void DrawingAreaChanged() override; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 7f33cb423..ba493132e 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -150,6 +150,26 @@ void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd) DrawFunction(cmd, &cmd->vertices[i], &cmd->vertices[i + 1]); } +void GPU_SW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) +{ + const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawLineFunction(cmd->shading_enable, cmd->transparency_enable); + + // Need to cut out the irrelevant bits. + // TODO: In _theory_ we could use the fixed-point parts here. + for (u32 i = 0; i < cmd->num_vertices; i += 2) + { + const GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT start = cmd->vertices[i]; + const GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT end = cmd->vertices[i + 1]; + const GPUBackendDrawLineCommand::Vertex vertices[2] = { + {.x = start.native_x, .y = start.native_y, .color = start.color}, + {.x = end.native_x, .y = end.native_y, .color = end.color}, + }; + + DrawFunction(cmd, &vertices[0], &vertices[1]); + } +} + void GPU_SW::DrawingAreaChanged() { // GPU_SW_Rasterizer::g_drawing_area set by base class. diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index b26c4616e..6e945195d 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -36,6 +36,7 @@ protected: void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + void DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) override; void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override; void DrawingAreaChanged() override; void ClearCache() override; diff --git a/src/core/gpu_sw_rasterizer.h b/src/core/gpu_sw_rasterizer.h index 94b1c81c7..4e46dcc25 100644 --- a/src/core/gpu_sw_rasterizer.h +++ b/src/core/gpu_sw_rasterizer.h @@ -32,7 +32,7 @@ using DrawTriangleFunction = void (*)(const GPUBackendDrawCommand* cmd, const GP const GPUBackendDrawPolygonCommand::Vertex* v2); typedef const DrawTriangleFunction DrawTriangleFunctionTable[2][2][2][2]; -using DrawLineFunction = void (*)(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, +using DrawLineFunction = void (*)(const GPUBackendDrawCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, const GPUBackendDrawLineCommand::Vertex* p1); typedef const DrawLineFunction DrawLineFunctionTable[2][2]; diff --git a/src/core/gpu_sw_rasterizer.inl b/src/core/gpu_sw_rasterizer.inl index 67d0e34f8..805276d81 100644 --- a/src/core/gpu_sw_rasterizer.inl +++ b/src/core/gpu_sw_rasterizer.inl @@ -766,8 +766,7 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* RESTRICT cmd) // TODO: Vectorize line draw. template -static void DrawLine(const GPUBackendDrawLineCommand* RESTRICT cmd, - const GPUBackendDrawLineCommand::Vertex* RESTRICT p0, +static void DrawLine(const GPUBackendDrawCommand* RESTRICT cmd, const GPUBackendDrawLineCommand::Vertex* RESTRICT p0, const GPUBackendDrawLineCommand::Vertex* RESTRICT p1) { static constexpr u32 XY_SHIFT = 32; diff --git a/src/core/gpu_thread_commands.h b/src/core/gpu_thread_commands.h index e09fd979c..9fda31e14 100644 --- a/src/core/gpu_thread_commands.h +++ b/src/core/gpu_thread_commands.h @@ -54,6 +54,7 @@ enum class GPUBackendCommandType : u8 DrawPrecisePolygon, DrawRectangle, DrawLine, + DrawPreciseLine, }; struct GPUThreadCommand @@ -317,6 +318,18 @@ struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand Vertex vertices[0]; }; +struct GPUBackendDrawPreciseLineCommand : public GPUBackendDrawCommand +{ + struct Vertex + { + float x, y, w; + s32 native_x, native_y; + u32 color; + }; + + Vertex vertices[0]; +}; + #ifdef _MSC_VER #pragma warning(pop) #endif