diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 38186d939..ae724905f 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -231,22 +231,22 @@ void GPU_SW::DispatchRenderCommand() { SWVertex& vert = vertices[i]; const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - vert.color_r = Truncate8(color_rgb); - vert.color_g = Truncate8(color_rgb >> 8); - vert.color_b = Truncate8(color_rgb >> 16); + vert.r = Truncate8(color_rgb); + vert.g = Truncate8(color_rgb >> 8); + vert.b = Truncate8(color_rgb >> 16); const VertexPosition vp{FifoPop()}; - vert.x = vp.x; - vert.y = vp.y; + vert.x = TruncateVertexPosition(m_drawing_offset.x + vp.x); + vert.y = TruncateVertexPosition(m_drawing_offset.y + vp.y); if (textured) { - std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(FifoPop())); + std::tie(vert.u, vert.v) = UnpackTexcoord(Truncate16(FifoPop())); } else { - vert.texcoord_x = 0; - vert.texcoord_y = 0; + vert.u = 0; + vert.v = 0; } } @@ -324,7 +324,8 @@ void GPU_SW::DispatchRenderCommand() // first vertex SWVertex* p0 = &vertices[0]; SWVertex* p1 = &vertices[1]; - p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())}); + p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())}, + m_drawing_offset.x, m_drawing_offset.y); p0->SetColorRGB24(first_color); // remaining vertices in line strip @@ -334,12 +335,12 @@ void GPU_SW::DispatchRenderCommand() if (rc.polyline) { p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color); - p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]}); + p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]}, m_drawing_offset.x, m_drawing_offset.y); } else { p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color); - p1->SetPosition(VertexPosition{Truncate32(FifoPop())}); + p1->SetPosition(VertexPosition{Truncate32(FifoPop())}, m_drawing_offset.x, m_drawing_offset.y); } // down here because of the FIFO pops @@ -358,222 +359,6 @@ void GPU_SW::DispatchRenderCommand() } } -enum : u32 -{ - COORD_FRAC_BITS = 32, - COLOR_FRAC_BITS = 12 -}; - -using FixedPointCoord = u64; - -constexpr FixedPointCoord IntToFixedCoord(s32 x) -{ - return (ZeroExtend64(static_cast(x)) << COORD_FRAC_BITS) | (ZeroExtend64(1u) << (COORD_FRAC_BITS - 1)); -} - -using FixedPointColor = u32; - -constexpr FixedPointColor IntToFixedColor(u8 r) -{ - return ZeroExtend32(r) << COLOR_FRAC_BITS | (1u << (COLOR_FRAC_BITS - 1)); -} - -constexpr u8 FixedColorToInt(FixedPointColor r) -{ - return Truncate8(r >> 12); -} - -bool GPU_SW::IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2) -{ - const s32 abx = v1->x - v0->x; - const s32 aby = v1->y - v0->y; - const s32 acx = v2->x - v0->x; - const s32 acy = v2->y - v0->y; - return ((abx * acy) - (aby * acx) < 0); -} - -static constexpr bool IsTopLeftEdge(s32 ex, s32 ey) -{ - return (ey < 0 || (ey == 0 && ex < 0)); -} - -static constexpr u8 Interpolate(u8 v0, u8 v1, u8 v2, s32 w0, s32 w1, s32 w2, s32 ws, s32 half_ws) -{ - const s32 v = w0 * static_cast(static_cast(v0)) + w1 * static_cast(static_cast(v1)) + - w2 * static_cast(static_cast(v2)); - const s32 vd = (v + half_ws) / ws; - return (vd < 0) ? 0 : ((vd > 0xFF) ? 0xFF : static_cast(vd)); -} - -template -void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2) -{ -#define orient2d(ax, ay, bx, by, cx, cy) ((bx - ax) * (cy - ay) - (by - ay) * (cx - ax)) - - // ensure the vertices follow a counter-clockwise order - if (IsClockwiseWinding(v0, v1, v2)) - std::swap(v1, v2); - - const s32 px0 = v0->x + m_drawing_offset.x; - const s32 py0 = v0->y + m_drawing_offset.y; - const s32 px1 = v1->x + m_drawing_offset.x; - const s32 py1 = v1->y + m_drawing_offset.y; - const s32 px2 = v2->x + m_drawing_offset.x; - const s32 py2 = v2->y + m_drawing_offset.y; - - // Barycentric coordinates at minX/minY corner - const s32 ws = orient2d(px0, py0, px1, py1, px2, py2); - const s32 half_ws = std::max((ws / 2) - 1, 0); - if (ws == 0) - return; - - // compute bounding box of triangle - s32 min_x = std::min(px0, std::min(px1, px2)); - s32 max_x = std::max(px0, std::max(px1, px2)); - s32 min_y = std::min(py0, std::min(py1, py2)); - s32 max_y = std::max(py0, std::max(py1, py2)); - - // reject triangles which cover the whole vram area - if (static_cast(max_x - min_x) > MAX_PRIMITIVE_WIDTH || static_cast(max_y - min_y) > MAX_PRIMITIVE_HEIGHT) - return; - - // clip to drawing area - min_x = std::clamp(min_x, static_cast(m_drawing_area.left), static_cast(m_drawing_area.right)); - max_x = std::clamp(max_x, static_cast(m_drawing_area.left), static_cast(m_drawing_area.right)); - min_y = std::clamp(min_y, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom)); - max_y = std::clamp(max_y, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom)); - AddDrawTriangleTicks(max_x - min_x + 1, max_y - min_y + 1, shading_enable, texture_enable, transparency_enable); - - // compute per-pixel increments - const s32 a01 = py0 - py1, b01 = px1 - px0; - const s32 a12 = py1 - py2, b12 = px2 - px1; - const s32 a20 = py2 - py0, b20 = px0 - px2; - - // top-left edge rule - const s32 w0_bias = 0 - s32(IsTopLeftEdge(b12, a12)); - const s32 w1_bias = 0 - s32(IsTopLeftEdge(b20, a20)); - const s32 w2_bias = 0 - s32(IsTopLeftEdge(b01, a01)); - - // compute base barycentric coordinates - s32 w0 = orient2d(px1, py1, px2, py2, min_x, min_y); - s32 w1 = orient2d(px2, py2, px0, py0, min_x, min_y); - s32 w2 = orient2d(px0, py0, px1, py1, min_x, min_y); - - // *exclusive* of max coordinate in PSX - for (s32 y = min_y; y <= max_y; y++) - { - s32 row_w0 = w0; - s32 row_w1 = w1; - s32 row_w2 = w2; - - for (s32 x = min_x; x <= max_x; x++) - { - if (((row_w0 + w0_bias) | (row_w1 + w1_bias) | (row_w2 + w2_bias)) >= 0) - { - const s32 b0 = row_w0; - const s32 b1 = row_w1; - const s32 b2 = row_w2; - - const u8 r = - shading_enable ? Interpolate(v0->color_r, v1->color_r, v2->color_r, b0, b1, b2, ws, half_ws) : v0->color_r; - const u8 g = - shading_enable ? Interpolate(v0->color_g, v1->color_g, v2->color_g, b0, b1, b2, ws, half_ws) : v0->color_g; - const u8 b = - shading_enable ? Interpolate(v0->color_b, v1->color_b, v2->color_b, b0, b1, b2, ws, half_ws) : v0->color_b; - - const u8 texcoord_x = Interpolate(v0->texcoord_x, v1->texcoord_x, v2->texcoord_x, b0, b1, b2, ws, half_ws); - const u8 texcoord_y = Interpolate(v0->texcoord_y, v1->texcoord_y, v2->texcoord_y, b0, b1, b2, ws, half_ws); - - ShadePixel( - static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y); - } - - row_w0 += a12; - row_w1 += a20; - row_w2 += a01; - } - - w0 += b12; - w1 += b20; - w2 += b01; - } - -#undef orient2d -} - -GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable, bool texture_enable, - bool raw_texture_enable, bool transparency_enable, - bool dithering_enable) -{ -#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING) \ - &GPU_SW::DrawTriangle - - static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = { - {{{{F(false, false, false, false, false), F(false, false, false, false, true)}, - {F(false, false, false, true, false), F(false, false, false, true, true)}}, - {{F(false, false, true, false, false), F(false, false, true, false, true)}, - {F(false, false, true, true, false), F(false, false, true, true, true)}}}, - {{{F(false, true, false, false, false), F(false, true, false, false, true)}, - {F(false, true, false, true, false), F(false, true, false, true, true)}}, - {{F(false, true, true, false, false), F(false, true, true, false, true)}, - {F(false, true, true, true, false), F(false, true, true, true, true)}}}}, - {{{{F(true, false, false, false, false), F(true, false, false, false, true)}, - {F(true, false, false, true, false), F(true, false, false, true, true)}}, - {{F(true, false, true, false, false), F(true, false, true, false, true)}, - {F(true, false, true, true, false), F(true, false, true, true, true)}}}, - {{{F(true, true, false, false, false), F(true, true, false, false, true)}, - {F(true, true, false, true, false), F(true, true, false, true, true)}}, - {{F(true, true, true, false, false), F(true, true, true, false, true)}, - {F(true, true, true, true, false), F(true, true, true, true, true)}}}}}; - -#undef F - - return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)] - [u8(dithering_enable)]; -} - -template -void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, - u8 origin_texcoord_y) -{ - const s32 start_x = TruncateVertexPosition(m_drawing_offset.x + origin_x); - const s32 start_y = TruncateVertexPosition(m_drawing_offset.y + origin_y); - - { - const u32 clip_left = static_cast(std::clamp(start_x, m_drawing_area.left, m_drawing_area.right)); - const u32 clip_right = - static_cast(std::clamp(start_x + static_cast(width), m_drawing_area.left, m_drawing_area.right)) + - 1u; - const u32 clip_top = static_cast(std::clamp(start_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(start_y + static_cast(height), m_drawing_area.top, m_drawing_area.bottom)) + - 1u; - AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable); - } - - for (u32 offset_y = 0; offset_y < height; offset_y++) - { - const s32 y = start_y + static_cast(offset_y); - if (y < static_cast(m_drawing_area.top) || y > static_cast(m_drawing_area.bottom)) - continue; - - const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y); - - for (u32 offset_x = 0; offset_x < width; offset_x++) - { - const s32 x = start_x + static_cast(offset_x); - if (x < static_cast(m_drawing_area.left) || x > static_cast(m_drawing_area.right)) - continue; - - const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); - - ShadePixel( - static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y); - } - } -} - constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT() { DitherLUT lut = {}; @@ -594,7 +379,7 @@ constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT() static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT(); template -void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) +void ALWAYS_INLINE_RELEASE GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) { VRAMPixel color; bool transparent; @@ -717,118 +502,570 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex if ((bg_color.bits & mask_and) != 0) return; - if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast(y) & 1u)) - return; - SetPixel(static_cast(x), static_cast(y), color.bits | m_GPUSTAT.GetMaskOR()); } -constexpr FixedPointCoord GetLineCoordStep(s32 delta, s32 k) +template +void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, + u8 origin_texcoord_y) { - s64 delta_fp = static_cast(ZeroExtend64(static_cast(delta)) << 32); - if (delta_fp < 0) - delta_fp -= s64(k - 1); - if (delta_fp > 0) - delta_fp += s64(k - 1); + const s32 start_x = TruncateVertexPosition(m_drawing_offset.x + origin_x); + const s32 start_y = TruncateVertexPosition(m_drawing_offset.y + origin_y); - return static_cast(delta_fp / k); + { + const u32 clip_left = static_cast(std::clamp(start_x, m_drawing_area.left, m_drawing_area.right)); + const u32 clip_right = + static_cast(std::clamp(start_x + static_cast(width), m_drawing_area.left, m_drawing_area.right)) + + 1u; + const u32 clip_top = static_cast(std::clamp(start_y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = + static_cast(std::clamp(start_y + static_cast(height), m_drawing_area.top, m_drawing_area.bottom)) + + 1u; + AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable); + } + + for (u32 offset_y = 0; offset_y < height; offset_y++) + { + const s32 y = start_y + static_cast(offset_y); + if (y < static_cast(m_drawing_area.top) || y > static_cast(m_drawing_area.bottom) || + (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast(y) & 1u))) + { + continue; + } + + const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y); + + for (u32 offset_x = 0; offset_x < width; offset_x++) + { + const s32 x = start_x + static_cast(offset_x); + if (x < static_cast(m_drawing_area.left) || x > static_cast(m_drawing_area.right)) + continue; + + const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); + + ShadePixel( + static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y); + } + } } -constexpr s32 FixedToIntCoord(FixedPointCoord x) +std::unique_ptr GPU::CreateSoftwareRenderer() { - return static_cast(Truncate32(x >> COORD_FRAC_BITS)); + return std::make_unique(); } -constexpr FixedPointColor GetLineColorStep(s32 delta, s32 k) +////////////////////////////////////////////////////////////////////////// +// Polygon and line rasterization ported from Mednafen +////////////////////////////////////////////////////////////////////////// + +#define COORD_FBS 12 +#define COORD_MF_INT(n) ((n) << COORD_FBS) +#define COORD_POST_PADDING 12 + +static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x) { - return static_cast(static_cast(delta) << COLOR_FRAC_BITS) / k; + return ((u64)x << 32) + ((1ULL << 32) - (1 << 11)); +} + +static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy) +{ + s64 ret; + s64 dx_ex = (u64)dx << 32; + + if (dx_ex < 0) + dx_ex -= dy - 1; + + if (dx_ex > 0) + dx_ex += dy - 1; + + ret = dx_ex / dy; + + return (ret); +} + +static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp) +{ + return (xfp >> 32); +} + +template +bool ALWAYS_INLINE_RELEASE GPU_SW::CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C) +{ +#define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y))) + + s32 denom = CALCIS(x, y); + + if (!denom) + return false; + + if constexpr (shading_enable) + { + idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + + idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + + idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + } + + if constexpr (texture_enable) + { + idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + + idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + } + + return true; + +#undef CALCIS +} + +template +void ALWAYS_INLINE_RELEASE GPU_SW::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/) +{ + if constexpr (shading_enable) + { + ig.r += idl.dr_dx * count; + ig.g += idl.dg_dx * count; + ig.b += idl.db_dx * count; + } + + if constexpr (texture_enable) + { + ig.u += idl.du_dx * count; + ig.v += idl.dv_dx * count; + } +} + +template +void ALWAYS_INLINE_RELEASE GPU_SW::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/) +{ + if constexpr (shading_enable) + { + ig.r += idl.dr_dy * count; + ig.g += idl.dg_dy * count; + ig.b += idl.db_dy * count; + } + + if constexpr (texture_enable) + { + ig.u += idl.du_dy * count; + ig.v += idl.dv_dy * count; + } +} + +template +void GPU_SW::DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl) +{ + if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast(y) & 1u)) + return; + + s32 x_ig_adjust = x_start; + s32 w = x_bound - x_start; + s32 x = TruncateVertexPosition(x_start); + + if (x < static_cast(m_drawing_area.left)) + { + s32 delta = static_cast(m_drawing_area.left) - x; + x_ig_adjust += delta; + x += delta; + w -= delta; + } + + if ((x + w) > (static_cast(m_drawing_area.right) + 1)) + w = static_cast(m_drawing_area.right) + 1 - x; + + if (w <= 0) + return; + + AddIDeltas_DX(ig, idl, x_ig_adjust); + AddIDeltas_DY(ig, idl, y); + + do + { + const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING); + const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING); + const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING); + const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING); + const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING); + + ShadePixel( + static_cast(x), static_cast(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u), Truncate8(v)); + + x++; + AddIDeltas_DX(ig, idl); + } while (--w > 0); +} + +template +void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2) +{ + u32 core_vertex; + { + u32 cvtemp = 0; + + if (v1->x <= v0->x) + { + if (v2->x <= v1->x) + cvtemp = (1 << 2); + else + cvtemp = (1 << 1); + } + else if (v2->x < v0->x) + cvtemp = (1 << 2); + else + cvtemp = (1 << 0); + + if (v2->y < v1->y) + { + std::swap(v2, v1); + cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1); + } + + if (v1->y < v0->y) + { + std::swap(v1, v0); + cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4); + } + + if (v2->y < v1->y) + { + std::swap(v2, v1); + cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1); + } + + core_vertex = cvtemp >> 1; + } + + if (v0->y == v2->y) + return; + + if (static_cast(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH || + static_cast(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH || + static_cast(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH || + static_cast(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT) + { + return; + } + + { + const s32 min_x = std::clamp(std::min(v0->x, std::min(v1->x, v2->x)), static_cast(m_drawing_area.left), + static_cast(m_drawing_area.right)); + const s32 max_x = std::clamp(std::max(v0->x, std::max(v1->x, v2->x)), static_cast(m_drawing_area.left), + static_cast(m_drawing_area.right)); + const s32 min_y = std::clamp(std::min(v0->y, std::min(v1->y, v2->y)), static_cast(m_drawing_area.top), + static_cast(m_drawing_area.bottom)); + const s32 max_y = std::clamp(std::max(v0->y, std::max(v1->y, v2->y)), static_cast(m_drawing_area.top), + static_cast(m_drawing_area.bottom)); + AddDrawTriangleTicks(static_cast(max_x - min_x + 1), static_cast(max_y - min_y + 1), shading_enable, + texture_enable, transparency_enable); + } + + s64 base_coord = MakePolyXFP(v0->x); + s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y)); + s64 bound_coord_us; + s64 bound_coord_ls; + bool right_facing; + + if (v1->y == v0->y) + { + bound_coord_us = 0; + right_facing = (bool)(v1->x > v0->x); + } + else + { + bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y)); + right_facing = (bool)(bound_coord_us > base_step); + } + + if (v2->y == v1->y) + bound_coord_ls = 0; + else + bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y)); + + i_deltas idl; + if (!CalcIDeltas(idl, v0, v1, v2)) + return; + + const SWVertex* vertices[3] = {v0, v1, v2}; + + i_group ig; + if constexpr (texture_enable) + { + ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + } + + ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + + AddIDeltas_DX(ig, idl, -vertices[core_vertex]->x); + AddIDeltas_DY(ig, idl, -vertices[core_vertex]->y); + + struct TriangleHalf + { + u64 x_coord[2]; + u64 x_step[2]; + + s32 y_coord; + s32 y_bound; + + bool dec_mode; + } tripart[2]; + + u32 vo = 0; + u32 vp = 0; + if (core_vertex != 0) + vo = 1; + if (core_vertex == 2) + vp = 3; + + { + TriangleHalf* tp = &tripart[vo]; + tp->y_coord = vertices[0 ^ vo]->y; + tp->y_bound = vertices[1 ^ vo]->y; + tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x); + tp->x_step[right_facing] = bound_coord_us; + tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step); + tp->x_step[!right_facing] = base_step; + tp->dec_mode = vo; + } + + { + TriangleHalf* tp = &tripart[vo ^ 1]; + tp->y_coord = vertices[1 ^ vp]->y; + tp->y_bound = vertices[2 ^ vp]->y; + tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x); + tp->x_step[right_facing] = bound_coord_ls; + tp->x_coord[!right_facing] = + base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) * + base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step); + tp->x_step[!right_facing] = base_step; + tp->dec_mode = vp; + } + + for (u32 i = 0; i < 2; i++) + { + s32 yi = tripart[i].y_coord; + s32 yb = tripart[i].y_bound; + + u64 lc = tripart[i].x_coord[0]; + u64 ls = tripart[i].x_step[0]; + + u64 rc = tripart[i].x_coord[1]; + u64 rs = tripart[i].x_step[1]; + + if (tripart[i].dec_mode) + { + while (yi > yb) + { + yi--; + lc -= ls; + rc -= rs; + + s32 y = TruncateVertexPosition(yi); + + if (y < static_cast(m_drawing_area.top)) + break; + + if (y > static_cast(m_drawing_area.bottom)) + continue; + + DrawSpan( + yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); + } + } + else + { + while (yi < yb) + { + s32 y = TruncateVertexPosition(yi); + + if (y > static_cast(m_drawing_area.bottom)) + break; + + if (y >= static_cast(m_drawing_area.top)) + { + + DrawSpan( + yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); + } + + yi++; + lc += ls; + rc += rs; + } + } + } +} + +GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable, bool texture_enable, + bool raw_texture_enable, bool transparency_enable, + bool dithering_enable) +{ +#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING) \ + &GPU_SW::DrawTriangle + + static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = { + {{{{F(false, false, false, false, false), F(false, false, false, false, true)}, + {F(false, false, false, true, false), F(false, false, false, true, true)}}, + {{F(false, false, true, false, false), F(false, false, true, false, true)}, + {F(false, false, true, true, false), F(false, false, true, true, true)}}}, + {{{F(false, true, false, false, false), F(false, true, false, false, true)}, + {F(false, true, false, true, false), F(false, true, false, true, true)}}, + {{F(false, true, true, false, false), F(false, true, true, false, true)}, + {F(false, true, true, true, false), F(false, true, true, true, true)}}}}, + {{{{F(true, false, false, false, false), F(true, false, false, false, true)}, + {F(true, false, false, true, false), F(true, false, false, true, true)}}, + {{F(true, false, true, false, false), F(true, false, true, false, true)}, + {F(true, false, true, true, false), F(true, false, true, true, true)}}}, + {{{F(true, true, false, false, false), F(true, true, false, false, true)}, + {F(true, true, false, true, false), F(true, true, false, true, true)}}, + {{F(true, true, true, false, false), F(true, true, true, false, true)}, + {F(true, true, true, true, false), F(true, true, true, true, true)}}}}}; + +#undef F + + return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)] + [u8(dithering_enable)]; +} + +enum +{ + Line_XY_FractBits = 32 +}; +enum +{ + Line_RGB_FractBits = 12 +}; + +struct line_fxp_coord +{ + u64 x, y; + u32 r, g, b; +}; + +struct line_fxp_step +{ + s64 dx_dk, dy_dk; + s32 dr_dk, dg_dk, db_dk; +}; + +static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk) +{ + delta = (u64)delta << Line_XY_FractBits; + + if (delta < 0) + delta -= dk - 1; + if (delta > 0) + delta += dk - 1; + + return (delta / dk); } template void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1) { - // Algorithm based on Mednafen. - if (p0->x > p1->x) - std::swap(p0, p1); - - const s32 dx = p1->x - p0->x; - const s32 dy = p1->y - p0->y; - const s32 k = std::max(std::abs(dx), std::abs(dy)); + const s32 i_dx = std::abs(p1->x - p0->x); + const s32 i_dy = std::abs(p1->y - p0->y); + const s32 k = (i_dx > i_dy) ? i_dx : i_dy; + if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT) + return; { // TODO: Move to base class - const s32 min_x = std::min(p0->x, p1->x); - const s32 max_x = std::max(p0->x, p1->x); - const s32 min_y = std::min(p0->y, p1->y); - const s32 max_y = std::max(p0->y, p1->y); - - const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); - const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + const u32 clip_left = + static_cast(std::clamp(std::min(p0->x, p1->x), m_drawing_area.left, m_drawing_area.left)); + const u32 clip_right = + static_cast(std::clamp(std::max(p0->x, p1->x), m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = + static_cast(std::clamp(std::min(p0->y, p1->y), m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = + static_cast(std::clamp(std::max(p0->y, p1->y), m_drawing_area.top, m_drawing_area.bottom)) + 1u; AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable); } - FixedPointCoord step_x, step_y; - FixedPointColor step_r, step_g, step_b; - if (k > 0) + if (p0->x >= p1->x && k > 0) + std::swap(p0, p1); + + line_fxp_step step; + if (k == 0) { - step_x = GetLineCoordStep(dx, k); - step_y = GetLineCoordStep(dy, k); + step.dx_dk = 0; + step.dy_dk = 0; if constexpr (shading_enable) { - step_r = GetLineColorStep(s32(ZeroExtend32(p1->color_r)) - s32(ZeroExtend32(p0->color_r)), k); - step_g = GetLineColorStep(s32(ZeroExtend32(p1->color_g)) - s32(ZeroExtend32(p0->color_g)), k); - step_b = GetLineColorStep(s32(ZeroExtend32(p1->color_b)) - s32(ZeroExtend32(p0->color_b)), k); - } - else - { - step_r = 0; - step_g = 0; - step_b = 0; + step.dr_dk = 0; + step.dg_dk = 0; + step.db_dk = 0; } } else { - step_x = 0; - step_y = 0; - step_r = 0; - step_g = 0; - step_b = 0; - } - - FixedPointCoord current_x = IntToFixedCoord(p0->x); - FixedPointCoord current_y = IntToFixedCoord(p0->y); - FixedPointColor current_r = IntToFixedColor(p0->color_r); - FixedPointColor current_g = IntToFixedColor(p0->color_g); - FixedPointColor current_b = IntToFixedColor(p0->color_b); - - for (s32 i = 0; i <= k; i++) - { - const s32 x = m_drawing_offset.x + FixedToIntCoord(current_x); - const s32 y = m_drawing_offset.y + FixedToIntCoord(current_y); - - const u8 r = shading_enable ? FixedColorToInt(current_r) : p0->color_r; - const u8 g = shading_enable ? FixedColorToInt(current_g) : p0->color_g; - const u8 b = shading_enable ? FixedColorToInt(current_b) : p0->color_b; - - if (x >= static_cast(m_drawing_area.left) && x <= static_cast(m_drawing_area.right) && - y >= static_cast(m_drawing_area.top) && y <= static_cast(m_drawing_area.bottom)) - { - ShadePixel(static_cast(x), static_cast(y), r, g, b, - 0, 0); - } - - current_x += step_x; - current_y += step_y; + step.dx_dk = LineDivide(p1->x - p0->x, k); + step.dy_dk = LineDivide(p1->y - p0->y, k); if constexpr (shading_enable) { - current_r += step_r; - current_g += step_g; - current_b += step_b; + step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k; + step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k; + step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k; + } + } + + line_fxp_coord cur_point; + cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1)); + cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1)); + + cur_point.x -= 1024; + + if (step.dy_dk < 0) + cur_point.y -= 1024; + + if constexpr (shading_enable) + { + cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); + cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); + cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); + } + + for (s32 i = 0; i <= k; i++) + { + // Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain. + const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047; + const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047; + + if (!IsInterlacedRenderingEnabled() || GetActiveLineLSB() != (static_cast(y) & 1u)) + { + const u8 r = shading_enable ? static_cast(cur_point.r >> Line_RGB_FractBits) : p0->r; + const u8 g = shading_enable ? static_cast(cur_point.g >> Line_RGB_FractBits) : p0->g; + const u8 b = shading_enable ? static_cast(cur_point.b >> Line_RGB_FractBits) : p0->b; + + if (x >= static_cast(m_drawing_area.left) && x <= static_cast(m_drawing_area.right) && + y >= static_cast(m_drawing_area.top) && y <= static_cast(m_drawing_area.bottom)) + { + ShadePixel(static_cast(x), static_cast(y), r, g, + b, 0, 0); + } + } + + cur_point.x += step.dx_dk; + cur_point.y += step.dy_dk; + + if constexpr (shading_enable) + { + cur_point.r += step.dr_dk; + cur_point.g += step.dg_dk; + cur_point.b += step.db_dk; } } } @@ -860,8 +1097,3 @@ GPU_SW::DrawRectangleFunction GPU_SW::GetDrawRectangleFunction(bool texture_enab return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]; } - -std::unique_ptr GPU::CreateSoftwareRenderer() -{ - return std::make_unique(); -} diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index a9113e2fb..08f2e242a 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -31,17 +31,17 @@ protected: struct SWVertex { s32 x, y; - u8 color_r, color_g, color_b; - u8 texcoord_x, texcoord_y; + u8 r, g, b; + u8 u, v; - ALWAYS_INLINE void SetPosition(VertexPosition p) + ALWAYS_INLINE void SetPosition(VertexPosition p, s32 offset_x, s32 offset_y) { - x = p.x; - y = p.y; + x = TruncateVertexPosition(offset_x + p.x); + y = TruncateVertexPosition(offset_y + p.y); } - ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(color_r, color_g, color_b) = UnpackColorRGB24(color); } - ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(texcoord_x, texcoord_y) = UnpackTexcoord(value); } + ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(r, g, b) = UnpackColorRGB24(color); } + ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(u, v) = UnpackTexcoord(value); } }; ////////////////////////////////////////////////////////////////////////// @@ -60,19 +60,9 @@ protected: void DispatchRenderCommand() override; - static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); - template void ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y); - template - void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); - - using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); - DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable, - bool transparency_enable, bool dithering_enable); - template void DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, u8 origin_texcoord_y); @@ -82,6 +72,45 @@ protected: DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, bool transparency_enable); + ////////////////////////////////////////////////////////////////////////// + // Polygon and line rasterization ported from Mednafen + ////////////////////////////////////////////////////////////////////////// + struct i_deltas + { + u32 du_dx, dv_dx; + u32 dr_dx, dg_dx, db_dx; + + u32 du_dy, dv_dy; + u32 dr_dy, dg_dy, db_dy; + }; + + struct i_group + { + u32 u, v; + u32 r, g, b; + }; + + template + bool CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C); + + template + void AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count = 1); + + template + void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1); + + template + void DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl); + + template + void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); + + using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); + DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable, + bool transparency_enable, bool dithering_enable); + template void DrawLine(const SWVertex* p0, const SWVertex* p1);