From ca161224ebe42e383f33b92d9ad9844660c31cae Mon Sep 17 00:00:00 2001 From: TJnotJT Date: Tue, 14 Jan 2025 20:00:49 -0500 Subject: [PATCH] GS: UV calculation using triangle edge rasterization. --- common/vsprops/common.props | 6 +- pcsx2-gsrunner/Main.cpp | 28 + pcsx2/GS/GSDrawingContext.cpp | 18 +- pcsx2/GS/GSState.cpp | 872 +++++++++++++++++- pcsx2/GS/GSState.h | 73 ++ .../GS/Renderers/Common/GSVertexTraceFMM.cpp | 8 + pcsx2/GS/Renderers/SW/GSDrawScanline.cpp | 92 ++ pcsx2/GS/Renderers/SW/GSRasterizer.cpp | 32 + pcsx2/GS/Renderers/SW/GSRendererSW.cpp | 73 +- pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h | 8 + pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp | 3 +- pcsx2/debug.h | 2 + pcsx2/pcsx2.vcxproj | 3 +- pcsx2/pcsx2.vcxproj.filters | 61 +- 14 files changed, 1231 insertions(+), 48 deletions(-) create mode 100644 pcsx2/debug.h diff --git a/common/vsprops/common.props b/common/vsprops/common.props index d97da47b8d..9b0a030d19 100644 --- a/common/vsprops/common.props +++ b/common/vsprops/common.props @@ -52,10 +52,10 @@ _M_X86;__SSE4_1__;%(PreprocessorDefinitions) - NotSet - AdvancedVectorExtensions2 + NotSet + AdvancedVectorExtensions2 + -march=nehalem %(AdditionalOptions) - -march=haswell %(AdditionalOptions) -march=armv8.4-a %(AdditionalOptions) %(AdditionalOptions) /Zc:externConstexpr /Zc:__cplusplus /Zo /utf-8 diff --git a/pcsx2-gsrunner/Main.cpp b/pcsx2-gsrunner/Main.cpp index d3ad1e8182..7b9dc3b887 100644 --- a/pcsx2-gsrunner/Main.cpp +++ b/pcsx2-gsrunner/Main.cpp @@ -45,6 +45,12 @@ #include "pcsx2/VMManager.h" #include "svnrev.h" +#include "debug.h" +#if MY_DEBUG == 1 + +#include +extern bool savePoints; +#endif namespace GSRunner { @@ -141,6 +147,20 @@ bool GSRunner::InitializeConfig() si.SetStringValue("MemoryCards", fmt::format("Slot{}_Filename", i + 1).c_str(), ""); } +#if MY_DEBUG == 1 + if (false) + { + si.SetBoolValue("EmuCore/GS", "dump", true); + si.SetIntValue("EmuCore/GS", "saven", 0); + si.SetIntValue("EmuCore/GS", "savel", 100); + si.SetBoolValue("EmuCore/GS", "save", true); + si.SetBoolValue("EmuCore/GS", "savef", true); + si.SetBoolValue("EmuCore/GS", "savet", true); + si.SetBoolValue("EmuCore/GS", "savez", true); + si.SetStringValue("EmuCore/GS", "HWDumpDirectory", "C:\\Users\\tchan\\Desktop\\ps2_debug"); + si.SetStringValue("EmuCore/GS", "SWDumpDirectory", "C:\\Users\\tchan\\Desktop\\ps2_debug"); + } +#endif VMManager::Internal::LoadStartupSettings(); return true; } @@ -857,8 +877,16 @@ LRESULT CALLBACK WndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) return DefWindowProcW(hwnd, msg, wParam, lParam); } +#if MY_DEBUG == 1 +extern void dumpRanges(); +#endif + int wmain(int argc, wchar_t** argv) { +#if MY_DEBUG == 1 + if (savePoints) + atexit(dumpRanges); +#endif std::vector u8_args; u8_args.reserve(static_cast(argc)); for (int i = 0; i < argc; i++) diff --git a/pcsx2/GS/GSDrawingContext.cpp b/pcsx2/GS/GSDrawingContext.cpp index 71ca866682..5edd692d84 100644 --- a/pcsx2/GS/GSDrawingContext.cpp +++ b/pcsx2/GS/GSDrawingContext.cpp @@ -5,7 +5,11 @@ #include "GS/GSGL.h" #include "GS/GS.h" #include "GS/GSUtil.h" +#include "GS/GSState.h" + +// FIXME: RENAME THIS FUNCTION AND CHANGE ARGS NAMES TO BE NICER! +// MAKE SURE BEING CALLED WITH ARGS IN THE RIGHT ORDER! static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv) { // return max possible texcoord. @@ -38,10 +42,8 @@ static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv) { // REGION_REPEAT adhears to the original texture size, even if offset outside the texture (with MAXUV). minuv &= limit; - if (tl < 0) - uv = minuv | maxuv; // wrap around, just use (any & mask) | fix. - else - uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask). + int ignore; + GSState::UsesRegionRepeat(maxuv, minuv, tl, br, &ignore, &uv); } return uv; @@ -130,18 +132,18 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, if (tw + th >= 19) // smaller sizes aren't worth, they just create multiple entries in the textue cache and the saved memory is less { - tw = reduce(uv.x, tw); - th = reduce(uv.y, th); + tw = reduce(uv.x + 1, tw); + th = reduce(uv.y + 1, th); } if (wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT) { - tw = extend(uv.x, tw); + tw = extend(uv.x + 1, tw); } if (wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT) { - th = extend(uv.y, th); + th = extend(uv.y + 1, th); } GIFRegTEX0 res = TEX0; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 83068f15db..f90a0362a3 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -18,10 +18,774 @@ #include #include +#include "debug.h" + +#define WALK_FULL_EDGE 1 + int GSState::s_n = 0; int GSState::s_last_transfer_draw_n = 0; int GSState::s_transfer_n = 0; +#if MY_DEBUG == 1 +bool savePoints = false; +int s_n_debug = -1; +int s_n_exit = -1; +int primID = 0; +int* primIDSW = 0; +std::map> pointsHackRange; +std::map> pointsSWRange; +std::vector> pointsHackDebug; +std::vector> pointsSWDebug; +std::map, std::tuple> pointsHackDebugOrig; +std::map, std::tuple> pointsSWDebugOrig; + +void sortPoints(std::vector>& v) +{ + std::sort(v.begin(), v.end(), [](const std::tuple& a, const std::tuple& b) { + int ai[5] = {std::get<0>(a), std::get<2>(a), std::get<1>(a), std::get<3>(a), std::get<4>(a)}; // prim, y, x, u, v + int bi[5] = {std::get<0>(b), std::get<2>(b), std::get<1>(b), std::get<3>(b), std::get<4>(b)}; // prim, y, x, u, v + for (int i = 0; i < 5; i++) + { + if (ai[i] < bi[i]) + return true; + if (ai[i] > bi[i]) + return false; + } + return false; + }); +} + +void dumpRanges() +{ + FILE* file = fopen("C:\\Users\\tchan\\Desktop\\log_files\\pointsHackRange.txt", "w"); + for (auto it = pointsHackRange.begin(); it != pointsHackRange.end(); it++) + { + auto [x, y, u, v] = it->second; + fprintf(file, "%d,%d,%d,%d,%d\n", it->first, x, y, u, v); + } + fclose(file); + + char fileName[1024]; + sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsHackDebug_%d.txt", s_n_debug); + file = fopen(fileName, "w"); + sortPoints(pointsHackDebug); + for (auto it = pointsHackDebug.begin(); it != pointsHackDebug.end(); it++) + { + auto [n, x, y, u, v] = *it; + fprintf(file, "%d,%d,%d,%d,%d\n", n, x, y, u, v); + } + fclose(file); + + sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsHackDebugOrig_%d.txt", s_n_debug); + file = fopen(fileName, "w"); + for (auto it = pointsHackDebugOrig.begin(); it != pointsHackDebugOrig.end(); it++) + { + auto [prim_id, vert_id] = it->first; + auto [x, y, u, v] = it->second; + fprintf(file, "%d,%d,%f,%f,%f,%f Hack\n", prim_id, vert_id, x, y, u, v); + } + fclose(file); + + file = fopen("C:\\Users\\tchan\\Desktop\\log_files\\pointsSWRange.txt", "w"); + for (auto it = pointsSWRange.begin(); it != pointsSWRange.end(); it++) + { + auto [x, y, u, v] = it->second; + fprintf(file, "%d,%d,%d,%d,%d\n", it->first, x, y, u, v); + } + fclose(file); + + sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsSWDebug_%d.txt", s_n_debug); + file = fopen(fileName, "w"); + for (auto it = pointsSWDebug.begin(); it != pointsSWDebug.end(); it++) + { + auto [n, x, y, u, v] = *it; + fprintf(file, "%d,%d,%d,%d,%d\n", n, x, y, u, v); + } + fclose(file); + + sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsSWDebugOrig_%d.txt", s_n_debug); + file = fopen(fileName, "w"); + sortPoints(pointsSWDebug); + for (auto it = pointsSWDebugOrig.begin(); it != pointsSWDebugOrig.end(); it++) + { + auto [prim_id, vert_id] = it->first; + auto [x, y, u, v] = it->second; + fprintf(file, "%d,%d,%f,%f,%f,%f SW\n", prim_id, vert_id, x, y, u, v); + } + fclose(file); + + // Compare + for (auto it = pointsHackRange.begin(); it != pointsHackRange.end(); it++) + { + if (pointsSWRange.contains(it->first)) + { + auto [x1, y1, u1, v1] = it->second; + auto [x2, y2, u2, v2] = pointsSWRange[it->first]; + if (x1 != x2 || y1 != y2 || u1 != u2 || v1 != v2) + { + char c[1024]; + sprintf(c, "Wrong range: %d; %d %d %d %d; %d %d %d %d;", it->first, x1, y1, u1, v1, x2, y2, u2, v2); + //throw std::exception(c); + } + } + } +} +#endif + +__forceinline GSState::EdgeFunction GSState::GetEdgeFunction(const Point& a, const Point& b) +{ + return {a.y - b.y, b.x - a.x, a.x * b.y - a.y * b.x}; +} + +__forceinline bool GSState::CheckEdgeFunction(double e, EdgeType edgeType) +{ + if (edgeType == EdgeType::TOP || edgeType == EdgeType::LEFT) + { + return e >= 0; + } + else if (edgeType == EdgeType::RIGHT || edgeType == EdgeType::BOTTOM) + { + return e > 0; + } + else + { + pxFail("Invalid edge type"); + } +} + +__forceinline std::tuple GSState::InterpolateEdgeFunctionsUV( + double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2) +{ + return {(e12 * p0.U + e20 * p1.U + e01 * p2.U) / (e01 + e12 + e20), (e12 * p0.V + e20 * p1.V + e01 * p2.V) / (e01 + e12 + e20)}; +} + +__forceinline std::tuple GSState::InterpolateEdgeFunctionsSTQ( + double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2) +{ + double S = (e12 * p0.S + e20 * p1.S + e01 * p2.S) / (e01 + e12 + e20); + double T = (e12 * p0.T + e20 * p1.T + e01 * p2.T) / (e01 + e12 + e20); + double Q = (e12 * p0.Q + e20 * p1.Q + e01 * p2.Q) / (e01 + e12 + e20); + return {S, T, Q}; +} + +__forceinline bool GSState::CheckXYBounds(double x, double y, int minX, int minY, int maxX, int maxY, bool dxBigger, bool dyBigger) +{ + if (dxBigger) + { + return minX <= x && x <= maxX; + } else if (dyBigger) + { + return minY <= y && y <= maxY; + } else + { + return (minX <= x && x <= maxX) || (minY <= y && y <= maxY); + } +} + +// Order points by y (ascending) then x (ascending) +// Make sure conforms to rule where interior of triangle is on the right of each directed edge. +__forceinline std::tuple GSState::SortPoints(Point p0, Point p1, Point p2) +{ + if ((p1.x - p0.x) * (p2.y - p0.y) - (p1.y - p0.y) * (p2.x - p0.x) < 0) + { + std::swap(p1, p2); + } + + Point p[3] = {p0, p1, p2}; + int i0 = 0; + for (int i = 1; i < 3; ++i) + { + if (p[i].y < p[i0].y || (p[i].y == p[i0].y && p[i].x < p[i0].x)) + { + i0 = i; + } + } + return {p[i0], p[(i0 + 1) % 3], p[(i0 + 2) % 3]}; +} + +// Order the points so that p0-p1 is a top or right edge, p1-p2 is a right or bottom edge, and p0-p2 is a left edge. +__forceinline std::tuple + GSState::SortAndClassifyTriangleVerts(Point p0, Point p1, Point p2) +{ + std::tie(p0, p1, p2) = SortPoints(p0, p1, p2); + EdgeType t01 = (p0.y == p1.y) ? EdgeType::TOP : EdgeType::RIGHT; + EdgeType t12 = (p1.y < p2.y) ? EdgeType::RIGHT : (p1.y > p2.y ? EdgeType::LEFT : EdgeType::BOTTOM); + EdgeType t20 = EdgeType::LEFT; + return {p0, p1, p2, t01, t12, t20}; +} + +__forceinline bool GSState::CheckScissor(int x, int y, int SCAX0, int SCAY0, int SCAX1, int SCAY1) +{ + return (SCAX0 <= x && x <= SCAX1) && (SCAY0 <= y && y <= SCAY1); +} + +// Calculate the final UV coordinates of the rasterized points by performing wrapping/clamping. +// Then adjust the min/max UV values accordingly. +// TODO: We can remove this function. Not used. +std::tuple GSState::CalculateUVHelper(int U, int V, int W, int H, const GIFRegCLAMP& clamp) +{ + // Initial clamping done on all UVs + U = std::max(-2047, std::min(2047, U)); + V = std::max(-2047, std::min(2047, V)); + + // Clamping/wrapping for U + const int MINU = static_cast(clamp.MINU); + const int MAXU = static_cast(clamp.MAXU); + const int MSKU = MINU, FIXU = MAXU; + switch (clamp.WMS) // U clamping/wrapping mode + { + case CLAMP_REPEAT: + U &= W - 1; // W is a power of 2 + break; + case CLAMP_CLAMP: + U = std::max(0, std::min(W - 1, U)); + break; + case CLAMP_REGION_CLAMP: + U = std::max(MINU, std::min(MAXU, U)); + break; + case CLAMP_REGION_REPEAT: + U = (U & MSKU) | FIXU; + break; + } + + // Clamping/wrapping for V + int MINV = static_cast(clamp.MINV); + int MAXV = static_cast(clamp.MAXV); + int MSKV = MINV, FIXV = MAXV; + switch (clamp.WMT) // V clamping/wrapping mode + { + case CLAMP_REPEAT: + V &= H - 1; // H is a power of 2 + break; + case CLAMP_CLAMP: + V = std::max(0, std::min(H - 1, V)); + break; + case CLAMP_REGION_CLAMP: + V = std::max(MINV, std::min(MAXV, V)); + break; + case CLAMP_REGION_REPEAT: + V = (V & MSKV) | FIXV; + break; + default: + ASSUME(0); + } + + return {U, V}; +} + +#if MY_DEBUG == 1 +bool g_switchOrient = false; +#endif + +void GSState::CalculateUV(double e01, double e12, double e20, EdgeType t0, EdgeType t1, EdgeType t2, + const Point& p0, const Point& p1, const Point& p2, + int W, int H, bool FST, bool bilinear, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV) +{ + double U, V; + if (FST) + { + std::tie(U, V) = InterpolateEdgeFunctionsUV(e01, e12, e20, p0, p1, p2); + } + else + { + auto [S, T, Q] = InterpolateEdgeFunctionsSTQ(e01, e12, e20, p0, p1, p2); + U = W * (S / Q); + V = H * (T / Q); + } + + // Clamp to valid UV range + U = std::max(-2047.0, std::min(2047.0, U)); + V = std::max(-2047.0, std::min(2047.0, V)); + + if (bilinear) + { +#if MY_DEBUG == 1 + if (GSState::s_n == s_n_debug) + { + int x = (int)((e01 * p2.x + e12 * p0.x + e20 * p1.x) / (e01 + e12 + e20)); + int y = (int)((e01 * p2.y + e12 * p0.y + e20 * p1.y) / (e01 + e12 + e20)); + if (g_switchOrient) + y = -y; + pointsHackDebug.push_back({primID, x, y, (int)std::floor(U - 0.5), (int)std::floor(V - 0.5)}); + pointsHackDebug.push_back({primID, x, y, (int)std::floor(U - 0.5), (int)std::floor(V + 0.5)}); + pointsHackDebug.push_back({primID, x, y, (int)std::floor(U + 0.5), (int)std::floor(V - 0.5)}); + pointsHackDebug.push_back({primID, x, y, (int)std::floor(U + 0.5), (int)std::floor(V + 0.5)}); + } +#endif + minU = std::min(static_cast(std::floor(U - 0.5)), minU); + minV = std::min(static_cast(std::floor(V - 0.5)), minV); + maxU = std::max(static_cast(std::floor(U + 0.5)), maxU); + maxV = std::max(static_cast(std::floor(V + 0.5)), maxV); + } + else + { +#if MY_DEBUG == 1 + if (GSState::s_n == s_n_debug) + { + int x = (int)((e01 * p2.x + e12 * p0.x + e20 * p1.x) / (e01 + e12 + e20)); + int y = (int)((e01 * p2.y + e12 * p0.y + e20 * p1.y) / (e01 + e12 + e20)); + if (g_switchOrient) + y = -y; + pointsHackDebug.push_back({primID, x, y, (int)std::floor(U), (int)std::floor(V)}); + } +#endif + minU = std::min(static_cast(std::floor(U)), minU); + minV = std::min(static_cast(std::floor(V)), minV); + maxU = std::max(static_cast(std::floor(U)), maxU); + maxV = std::max(static_cast(std::floor(V)), maxV); + } +} + +// Test if any of the 4 scissor corners will be rasterized. +// Helps with getting the proper min/max when the triangle is scissored +void GSState::CheckScissorUV(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear, + const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV) +{ + int SCAX0 = static_cast(scissor.SCAX0); + int SCAY0 = static_cast(scissor.SCAY0); + int SCAX1 = static_cast(scissor.SCAX1); + int SCAY1 = static_cast(scissor.SCAY1); + + // Get edge function coefficients + EdgeFunction E01 = GetEdgeFunction(p0, p1); + EdgeFunction E12 = GetEdgeFunction(p1, p2); + EdgeFunction E20 = GetEdgeFunction(p2, p0); + + for (int x : {SCAX0, SCAX1}) + { + for (int y : {SCAY0, SCAY1}) + { + double e01 = E01.a * x + E01.b * y + E01.c; + double e12 = E12.a * x + E12.b * y + E12.c; + double e20 = E20.a * x + E20.b * y + E20.c; + if (CheckEdgeFunction(e01, t01) && CheckEdgeFunction(e12, t12) && CheckEdgeFunction(e20, t20)) + { + CalculateUV(e01, e12, e20, t01, t12, t20, p0, p1, p2, W, H, FST, bilinear, clamp, minU, minV, maxU, maxV); + } + } + } +} + +void GSState::EdgeWalkTriangleMinMaxUVImpl(Point p0, Point p1, Point p2, + EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear, + const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, bool switchOrient, int& minU, int& minV, int& maxU, int& maxV) +{ + int SCAX0 = static_cast(scissor.SCAX0); + int SCAY0 = static_cast(scissor.SCAY0); + int SCAX1 = static_cast(scissor.SCAX1); + int SCAY1 = static_cast(scissor.SCAY1); + +#if MY_DEBUG == 1 + g_switchOrient = switchOrient; +#endif + // To traverse in the opposite direction we swap the first and second points + // and flip the whole triangle vertically (this is so that right-hand interior rule can still be followed). + // This only affects XY so doesn't matter for computing UV ranges. + if (switchOrient) + { + std::swap(p0, p1); + std::swap(t12, t20); + p0.y = -p0.y; + p1.y = -p1.y; + p2.y = -p2.y; + std::tie(SCAY0, SCAY1) = std::tuple(-SCAY1, -SCAY0); + }; + + // Get edge function coefficients + EdgeFunction E01 = GetEdgeFunction(p0, p1); + EdgeFunction E12 = GetEdgeFunction(p1, p2); + EdgeFunction E20 = GetEdgeFunction(p2, p0); + + // Initialize deltas and steps + double dx = p1.x - p0.x; + double dy = p1.y - p0.y; + int sx = (dx > 0) ? 1.0 : -1.0; + int sy = (dy > 0) ? 1.0 : -1.0; + + // Initialize starting point by rounding correctly + // Use right-hand-interior rule and brute force case analysis to get the correct starting point + // FIXME: THIS IS WRONG!!! THERE CAN REALLY BE 3 CANDIDDATE POINTS + int x, y; + if (dy == 0.0) + { + if (dx > 0.0) + { + x = static_cast(std::floor(p0.x)); + y = static_cast(std::ceil(p0.y)); + sy = 1; + } + else + { + x = static_cast(std::ceil(p0.x)); + y = static_cast(std::floor(p0.y)); + sy = -1; + } + } + else if (dx == 0.0) + { + if (dy > 0.0) + { + y = static_cast(std::floor(p0.y)); + x = static_cast(std::floor(p0.x)); + sx = -1; + } + else + { + y = static_cast(std::ceil(p0.y)); + x = static_cast(std::ceil(p0.x)); + sx = 1; + } + } + else if ((dx > 0.0) && (dy > 0.0)) + { + if (E01.a * std::ceil(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0.0) + { + x = static_cast(std::ceil(p0.x)); + y = static_cast(std::ceil(p0.y)); + } + else if (E01.a * std::floor(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0.0) + { + x = static_cast(std::floor(p0.x)); + y = static_cast(std::ceil(p0.y)); + } + else + { + // Should be unreachable + pxFail("Invalid edge function"); + } + } + else if ((dx < 0.0) && (dy > 0.0)) + { + if (E01.a * std::floor(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0) + { + x = static_cast(std::floor(p0.x)); + y = static_cast(std::ceil(p0.y)); + } + else if (E01.a * std::floor(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0) + { + x = static_cast(std::floor(p0.x)); + y = static_cast(std::floor(p0.y)); + } + else + { + // Should be unreachable + pxFail("Invalid edge function"); + } + } + else if ((dx < 0.0) && (dy < 0.0)) + { + if (E01.a * std::floor(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0) + { + x = static_cast(std::floor(p0.x)); + y = static_cast(std::floor(p0.y)); + } + else if (E01.a * std::ceil(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0) + { + x = static_cast(std::ceil(p0.x)); + y = static_cast(std::floor(p0.y)); + } + else + { + // Should be unreachable + pxFail("Invalid edge function"); + } + } + else if ((dx > 0) && (dy < 0)) + { + if (E01.a * std::ceil(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0) + { + x = static_cast(std::ceil(p0.x)); + y = static_cast(std::floor(p0.y)); + } + else if (E01.a * std::ceil(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0) + { + x = static_cast(std::ceil(p0.x)); + y = static_cast(std::ceil(p0.y)); + } + else + { + pxFail("Invalid edge function"); + } + } + + // Initialize edge function values + double e01 = E01.a * x + E01.b * y + E01.c; + double e12 = E12.a * x + E12.b * y + E12.c; + double e20 = E20.a * x + E20.b * y + E20.c; + + // Bounding box of the edge + const int minX = static_cast(std::floor(std::min(p0.x, p1.x))); + const int minY = static_cast(std::floor(std::min(p0.y, p1.y))); + const int maxX = static_cast(std::ceil(std::max(p0.x, p1.x))); + const int maxY = static_cast(std::ceil(std::max(p0.y, p1.y))); + bool dxBigger = std::abs(dx) > std::abs(dy); + bool dyBigger = std::abs(dy) > std::abs(dx); + + // Walk along the longest axis + while (CheckXYBounds(x, y, minX, minY, maxX, maxY, dxBigger, dyBigger)) // Check if the current point is inside the bounds of the line + { + // Update min/max UV values if the current point is inside the triangle and XY is inside the scissor + if (CheckEdgeFunction(e01, t01) && CheckEdgeFunction(e12, t12) && CheckEdgeFunction(e20, t20) && CheckScissor(x, y, SCAX0, SCAY0, SCAX1, SCAY1)) + { + CalculateUV(e01, e12, e20, t01, t12, t20, p0, p1, p2, W, H, FST, bilinear, clamp, minU, minV, maxU, maxV); + + // FIXME: TEST WITH dx == 0 and dy == 0 optimization for + // horiz and vert edges! + if (!WALK_FULL_EDGE) + // if (!WALK_FULL_EDGE || dx == 0 || dy == 0) + { + // Break after the first rasterizable point. If a horizontal or vertical edge, this should be sufficient + // since we walk along the edge in both directions. + // Otherwise, this might not always be the "most outside" point but should be much faster. + break; + } + } + // Test the horizontal and vertical edge functions + double e01x = e01 + E01.a * sx; + double e01y = e01 + E01.b * sy; + if (!CheckEdgeFunction(e01x, t01)) + { + // Cannot go horizontally, so go vertically + y += sy; + e01 += E01.b * sy; + e12 += E12.b * sy; + e20 += E20.b * sy; + } + else if (!CheckEdgeFunction(e01y, t01)) + { + // Cannot go vertically, so go horizontally + x += sx; + e01 += E01.a * sx; + e12 += E12.a * sx; + e20 += E20.a * sx; + } + else if (e01x < e01y) + { + // Can go both, but horizontal is closer to the line + x += sx; + e01 += E01.a * sx; + e12 += E12.a * sx; + e20 += E20.a * sx; + } + else if (e01x > e01y) + { + // Can go both, but diagonal is closer to the line + y += sy; + e01 += E01.b * sy; + e12 += E12.b * sy; + e20 += E20.b * sy; + } + else + { + // Should be unreachable + pxFail("Invalid edge function"); + } + } +} + +void GSState::EdgeWalkTriangleMinMaxUV(Point p0, Point p1, Point p2, int W, int H, bool FST, bool bilinear, GIFRegSCISSOR scissor, GIFRegCLAMP clamp, + int& minU, int& minV, int& maxU, int& maxV) +{ + // Rearrange the points in the correct order and walk along all edges + auto [v0_, v1_, v2_, t01, t12, t20] = SortAndClassifyTriangleVerts(p0, p1, p2); + + // Check if the corners of the scissor region are inside the triangle + CheckScissorUV(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, minU, minV, maxU, maxV); + + // Walk along edges in the clockwise direction (if Y-axis points down) + EdgeWalkTriangleMinMaxUVImpl(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV); + EdgeWalkTriangleMinMaxUVImpl(v1_, v2_, v0_, t12, t20, t01, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV); + EdgeWalkTriangleMinMaxUVImpl(v2_, v0_, v1_, t20, t01, t12, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV); + + // Walk along edges in the anti-clockwise direction (if Y-axis points down) + EdgeWalkTriangleMinMaxUVImpl(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV); + EdgeWalkTriangleMinMaxUVImpl(v1_, v2_, v0_, t12, t20, t01, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV); + EdgeWalkTriangleMinMaxUVImpl(v2_, v0_, v1_, t20, t01, t12, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV); +} + +// Get the minimum and maximum UV coordinates of all triangles in the current vertex buffer +// This should only be called if all vertices in the buffer are complete triangles +// W and H are the texture width and height +// minU, minV, maxU, maxV are the current min/max UV values and the output +// bilinear is whether or not to use bilinear interpolation (if false, nearest neighbor is used) +void GSState::GetTriangleMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const +{ + // Get XY offset values to get XY in window coordinates + int OFX = static_cast(m_context->XYOFFSET.OFX); + int OFY = static_cast(m_context->XYOFFSET.OFY); + + pxAssert((m_index.tail % 3) == 0); // should be a multiple of 3 for triangles + + // Initialize min/max UV values + minU = std::numeric_limits::max(); + minV = std::numeric_limits::max(); + maxU = std::numeric_limits::min(); + maxV = std::numeric_limits::min(); + + // Iterate through each triangle and get min/max UVs + for (size_t tri_i = 0; tri_i < m_index.tail / 3; tri_i++) + { +#if MY_DEBUG == 1 + primID = tri_i; + if (s_n == 58 && tri_i == 1094) + { + printf("\n"); + } +#endif + Point verts[3]; + for (int vert_i = 0; vert_i < 3; vert_i++) + { + Point p; + const int xi = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].XYZ.X); + const int yi = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].XYZ.Y); + p.x = static_cast(xi - OFX) / 16.0; + p.y = static_cast(yi - OFY) / 16.0; + if (m_draw_env->PRIM.FST) + { + p.U = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].U) / 16.0; + p.V = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].V) / 16.0; + } + else + { + p.S = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].ST.S); + p.T = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].ST.T); + p.Q = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].RGBAQ.Q); + } + verts[vert_i] = p; + } + +#if MY_DEBUG == 1 + if (s_n == s_n_debug) + { + pointsHackDebugOrig[{tri_i, 0}] = {verts[0].x, verts[0].y, verts[0].S, verts[0].T}; + pointsHackDebugOrig[{tri_i, 1}] = {verts[1].x, verts[1].y, verts[1].S, verts[1].T}; + pointsHackDebugOrig[{tri_i, 2}] = {verts[2].x, verts[2].y, verts[2].S, verts[2].T}; + } +#endif + EdgeWalkTriangleMinMaxUV(verts[0], verts[1], verts[2], W, H, m_draw_env->PRIM.FST, bilinear, m_context->SCISSOR, m_context->CLAMP, minU, minV, maxU, maxV); + } + + GSState::GetClampWrapMinMaxUV(W, m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, minU, maxU, minU, maxU); + GSState::GetClampWrapMinMaxUV(H, m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, minV, maxV, minV, maxV); +} + +// Get the minimum and maximum UV coordinates of all sprites in the current vertex buffer +// This should only be called if all vertices in the buffer are complete sprites +// W and H are the texture width and height +// minU, minV, maxU, maxV are the current min/max UV values and the output +// bilinear is whether or not to use bilinear interpolation (if false, nearest neighbor is used) +void GSState::GetSpriteMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const +{ + // Get XY offset values to get XY in window coordinates + int OFX = static_cast(m_context->XYOFFSET.OFX); + int OFY = static_cast(m_context->XYOFFSET.OFY); + + pxAssert((m_index.tail % 2) == 0); // should be a multiple of 2 for sprites + + // Initialize min/max UV values + minU = std::numeric_limits::max(); + minV = std::numeric_limits::max(); + maxU = std::numeric_limits::min(); + maxV = std::numeric_limits::min(); + + // Iterate through each sprite and get min/max UVs + for (size_t sprite_i = 0; sprite_i < m_index.tail / 2; sprite_i++) + { + Point verts[2]; + for (int vert_i = 0; vert_i < 2; vert_i++) + { + int xi = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].XYZ.X); + int yi = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].XYZ.Y); + Point p; + p.x = static_cast(xi - OFX) / 16.0; + p.y = static_cast(yi - OFY) / 16.0; + if (m_draw_env->PRIM.FST) + { + p.U = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].U) / 16.0; + p.V = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].V) / 16.0; + } + else + { + p.U = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].ST.S) * W; + p.T = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].ST.T) * H; + p.Q = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].RGBAQ.Q); + } + verts[vert_i] = p; + } + + if (!m_draw_env->PRIM.FST) + { + // For sprites, always use the second Q value + verts[0].S = verts[0].S / verts[1].Q; + verts[0].T = verts[0].T / verts[1].Q; + verts[1].S = verts[1].S / verts[1].Q; + verts[1].T = verts[1].T / verts[1].Q; + } + + double x0 = std::min(verts[0].x, verts[1].x); + double y0 = std::min(verts[0].y, verts[1].y); + double x1 = std::max(verts[0].x, verts[1].x); + double y1 = std::max(verts[0].y, verts[1].y); + + double U0 = verts[0].x == x0 ? verts[0].U : verts[1].U; + double V0 = verts[0].y == y0 ? verts[0].V : verts[1].V; + double U1 = verts[0].x == x1 ? verts[0].U : verts[1].U; + double V1 = verts[0].y == y1 ? verts[0].V : verts[1].V; + + int xVals[2] = { static_cast(std::ceil(x0)), static_cast(std::floor(x1)) }; + int yVals[2] = { static_cast(std::ceil(y0)), static_cast(std::floor(y1)) }; + if (std::floor(x1) == x1) // omit right edges + xVals[1]--; + if (std::floor(y1) == y1) // omit bottom edges + yVals[1]--; + + // scissoring + int SCAX0 = static_cast(m_context->SCISSOR.SCAX0); + int SCAY0 = static_cast(m_context->SCISSOR.SCAY0); + int SCAX1 = static_cast(m_context->SCISSOR.SCAX1); + int SCAY1 = static_cast(m_context->SCISSOR.SCAY1); + for (int i = 0; i < 2; i++) + xVals[i] = std::max(SCAX0, std::min(xVals[i], SCAX1)); + for (int i = 0; i < 2; i++) + yVals[i] = std::max(SCAY0, std::min(yVals[i], SCAY1)); + + if (xVals[0] <= xVals[1] && yVals[0] <= yVals[1]) + { + for (double x : xVals) + { + const double U = ((x1 - x) * U0 + (x - x0) * U1) / (x1 - x0); + if (bilinear) + { + minU = std::min(static_cast(std::floor(U - 0.5)), minU); + maxU = std::max(static_cast(std::floor(U + 0.5)), maxU); + } + else + { + minU = std::min(static_cast(std::floor(U)), minU); + maxU = std::max(static_cast(std::floor(U)), maxU); + } + } + for (double y : yVals) + { + const double V = ((y1 - y) * V0 + (y - y0) * V1) / (y1 - y0); + if (bilinear) + { + minV = std::min(static_cast(std::floor(V - 0.5)), minV); + maxV = std::max(static_cast(std::floor(V + 0.5)), maxV); + } + else + { + minV = std::min(static_cast(std::floor(V)), minV); + maxV = std::max(static_cast(std::floor(V)), maxV); + } + } + } + } + + GSState::GetClampWrapMinMaxUV(W, m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, minU, maxU, minU, maxU); + GSState::GetClampWrapMinMaxUV(H, m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, minV, maxV, minV, maxV); +} + static __fi bool IsAutoFlushEnabled() { return GSIsHardwareRenderer() ? (GSConfig.UserHacks_AutoFlush != GSHWAutoFlushLevel::Disabled) : GSConfig.AutoFlushSW; @@ -1675,6 +2439,7 @@ void GSState::FlushPrim() } #endif + // TODO: Put the accurate UV calculation here? m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); // Texel coordinate rounding @@ -3716,16 +4481,117 @@ __forceinline void GSState::VertexKick(u32 skip) Flush(VERTEXCOUNT); } +// FIXME: Replace old UsesRepeatRange to this. +// Maps the range [ min .. max ] under the region repeat function determined by MSK and FIX. +// The region repeat function is f(x) = (x & MSK) | FIX. +// Return true if f(x) != x for at least one x in [ min .. max ]. +bool GSState::GetRegionRepeatMinMaxUV(int MSK, int FIX, int min, int max, int& min_out, int& max_out) +{ + // If we cross from -1 to 0 combine the negative and positive parts separately + // as the below algorithm only works if min <= max as unsigned integers. + if (min < 0 && 0 <= max) + { + int min_out_1, max_out_1, min_out_2, max_out_2; + const bool modified_1 = GSState::GetRegionRepeatMinMaxUV(MSK, FIX, min, -1, min_out_1, max_out_1); + const bool modified_2 = GSState::GetRegionRepeatMinMaxUV(MSK, FIX, 0, max, min_out_2, max_out_2); + min_out = std::min(min_out_1, min_out_2); + max_out = std::max(max_out_1, max_out_2); + return modified_1 || modified_2; + } + + const int cleared_bits = ~MSK & ~FIX; // Bits that are always cleared by applying msk and fix + const int set_bits = FIX; // Bits that are always set by applying msk and fix + unsigned long msb; + int variable_bits = min ^ max; + if (_BitScanReverse(&msb, variable_bits)) + variable_bits |= (1 << msb) - 1; // Fill in all lower bits + + const int always_set = min & ~variable_bits; // Bits that are set in every value in min...max + const int sometimes_set = min | variable_bits; // Bits that are set in at least one value in min...max + + const bool sets_bits = (set_bits | always_set) != always_set; // At least one bit in min...max is set by applying msk and fix + const bool clears_bits = (cleared_bits & sometimes_set) != 0; // At least one bit in min...max is cleared by applying msk and fix + + const int overwritten_variable_bits = (cleared_bits | set_bits) & variable_bits; + // A variable bit that's `0` in `min` will at some point switch to a `1` (because it's variable) + // When it does, all bits below it will switch to a `0` (that's how incrementing works) + // If the 0 to 1 switch is reflected in the final output (not masked and not replaced by a fixed value), + // the final value would be larger than the previous. Otherwise, the final value will be less. + // The true minimum value is `min` with all bits below the most significant replaced variable `0` bit cleared + const int min_overwritten_variable_zeros = ~min & overwritten_variable_bits; + if (_BitScanReverse(&msb, min_overwritten_variable_zeros)) + min &= (~0u << msb); + // Similar thing for max, but the first masked `1` bit + const int max_overwritten_variable_ones = max & overwritten_variable_bits; + if (_BitScanReverse(&msb, max_overwritten_variable_ones)) + max |= (1 << msb) - 1; + + min_out = (MSK & min) | FIX; + max_out = (MSK & max) | FIX; + + return sets_bits || clears_bits; +} + +// Get the min/max texel coordinate (U or V) assuming it takes the values min .. max and is then +// wrapped/clamped according to the mode WM. +// SIZE: Width/height of texture (power of 2) +// MIN/MAX: Either the clamping range (in REGION_CLAMP mode) or the MKS/FIX parameters (in REGION_REPEAT mode) +// Returns true if any of the values are changed. I.e., if f(x) is the mapping function for clamp/wrap mode, +// return true if f(x) != x for some x in [ min .. max ] +bool GSState::GetClampWrapMinMaxUV(int SIZE, int WM, int MIN, int MAX, int min, int max, int& min_out, int& max_out) +{ + const int MSK = MIN; + const int FIX = MAX; + + if (WM == CLAMP_REPEAT) + { + // If we cross the SIZE boundary then we always get the largest/smallest possible wrapped value + if ((min & ~(SIZE - 1)) != (max & ~(SIZE - 1))) + { + min_out = 0; + max_out = SIZE - 1; + } + else + { + min_out = min & (SIZE - 1); + max_out = max & (SIZE - 1); + } + return 0 <= min && max <= SIZE - 1; + } + else if (WM == CLAMP_CLAMP) + { + min_out = std::max(0, std::min(SIZE - 1, min)); + max_out = std::max(0, std::min(SIZE - 1, max)); + return 0 <= min && max <= SIZE - 1; + } + else if (WM == CLAMP_REGION_CLAMP) + { + min_out = std::max(MIN, std::min(MAX, min)); + max_out = std::max(MIN, std::min(MAX, max)); + return MIN <= min && max <= MAX; + } + else if (WM == CLAMP_REGION_REPEAT) + { + return GSState::GetRegionRepeatMinMaxUV(MSK, FIX, min, max, min_out, max_out); + } + else + { + pxAssertMsg(false, "Invalid clamp/wrap mode"); + return false; + } +} + /// Checks if region repeat is used (applying it does something to at least one of the values in min...max) /// Also calculates the real min and max values seen after applying the region repeat to all values in min...max -static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out) +/// FIXME: CHANGE MEMBER NAMES AND MAKE SURE CALLS HAVE ARGS IN IN RIGHT ORDER!!! +bool GSState::UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out) { if ((min < 0) != (max < 0)) { // Algorithm doesn't work properly if bits overflow when incrementing (happens on the -1 → 0 crossing) // Conveniently, crossing zero guarantees you use the full range *min_out = fix; - *max_out = (fix | msk) + 1; + *max_out = fix | msk; return true; } @@ -3757,7 +4623,7 @@ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, i max |= (1 << msb) - 1; *min_out = (msk & min) | fix; - *max_out = ((msk & max) | fix) + 1; + *max_out = (msk & max) | fix; return sets_bits || clears_bits; } diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 94f9a5442b..773566c78f 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -22,6 +22,9 @@ public: virtual ~GSState(); static constexpr int GetSaveStateSize(); + static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out); + static bool GetRegionRepeatMinMaxUV(int MSK, int FIX, int min, int max, int& min_out, int& max_out); + static bool GetClampWrapMinMaxUV(int SIZE, int WM, int MSK, int FIX, int min, int max, int& min_out, int& max_out); private: // RESTRICT prevents multiple loads of the same part of the register when accessing its bitfields (the compiler is happy to know that memory writes in-between will not go there) @@ -361,6 +364,76 @@ public: void CalculateDisplayOffset(bool scanmask); } PCRTCDisplays; +public: + struct Point + { + double x, y; + union + { + double U; + double S; + }; + union + { + double T; + double V; + }; + double Q; + + Point(double x = 0, double y = 0, double US = NAN, double VT = NAN, double Q = NAN) + : x(x) + , y(y) + , U(US) + , V(VT) + , Q(Q) + { + } + }; + + struct EdgeFunction + { + double a, b, c; + + EdgeFunction(double a, double b, double c) + : a(a) + , b(b) + , c(c) + { + } + }; + + enum class EdgeType + { + TOP, + RIGHT, + LEFT, + BOTTOM + }; + +public: + static EdgeFunction GetEdgeFunction(const Point& a, const Point& b); + static bool CheckEdgeFunction(double e, EdgeType edgeType); + static std::tuple InterpolateEdgeFunctionsUV( + double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2); + static std::tuple InterpolateEdgeFunctionsSTQ( + double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2); + static bool CheckXYBounds(double x, double y, int minX, int minY, int maxX, int maxY, bool dxBigger, bool dyBigger); + static std::tuple SortPoints(Point p0, Point p1, Point p2); + static __forceinline bool CheckScissor(int x, int y, int SCAX0, int SCAY0, int SCAX1, int SCAY1); + static std::tuple SortAndClassifyTriangleVerts(Point p0, Point p1, Point p2); + static std::tuple CalculateUVHelper(int U, int V, int W, int H, const GIFRegCLAMP& clamp); + static void CalculateUV(double e01, double e12, double e20, EdgeType t0, EdgeType t1, EdgeType t2, + const Point& p0, const Point& p1, const Point& p2, + int W, int H, bool FST, bool bilinear, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV); + static void CheckScissorUV(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear, + const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV); + static void EdgeWalkTriangleMinMaxUVImpl(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear, + const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, bool switchOrient, int& minU, int& minV, int& maxU, int& maxV); + static void EdgeWalkTriangleMinMaxUV(Point p0, Point p1, Point p2, int W, int H, bool FST, bool bilinear, GIFRegSCISSOR scissor, GIFRegCLAMP clamp, + int& minU, int& minV, int& maxU, int& maxV); + void GetTriangleMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const; + void GetSpriteMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const; + public: /// Returns the appropriate directory for draw dumping. static std::string GetDrawDumpPath(const char* format, ...); diff --git a/pcsx2/GS/Renderers/Common/GSVertexTraceFMM.cpp b/pcsx2/GS/Renderers/Common/GSVertexTraceFMM.cpp index 4b9e5e5801..f7ebd4b60b 100644 --- a/pcsx2/GS/Renderers/Common/GSVertexTraceFMM.cpp +++ b/pcsx2/GS/Renderers/Common/GSVertexTraceFMM.cpp @@ -183,6 +183,14 @@ void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u for (; i < (count - 1); i += 2) // 2x loop unroll { processVertices(v[index[i + 0]], v[index[i + 1]], true); + if (GSState::s_n == 58) + { + fprintf(stderr, "%d: %f %f %f %f\n", i, tmin.x, tmin.y, tmax.x, tmax.y); + } + if (GSState::s_n == 58 && i == 164) + { + fprintf(stderr, "hi\n"); + } } if (count & 1) { diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp index e6fa605090..c1b1b1d380 100644 --- a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp +++ b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp @@ -10,8 +10,21 @@ #include +#include "debug.h" +#if MY_DEBUG == 1 +#include + +extern bool savePoints; +extern std::map> pointsHackRange; +extern std::map> pointsSWRange; +extern std::vector> pointsHackDebug; +extern std::vector> pointsSWDebug; +extern int s_n_debug; +extern int* primIDSW; +#else // Comment to disable all dynamic code generation. #define ENABLE_JIT_RASTERIZER +#endif #if MULTI_ISA_COMPILE_ONCE // Lack of a better home @@ -627,8 +640,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV } } +#if MY_DEBUG == 1 + int x = left - 4; +#endif while (1) { +#if MY_DEBUG == 1 + x += 4; +#endif + if (global.s_n == 58 && top == 188 && (*primIDSW - 1 == 1094)) + { + printf(""); + } do { int fa = 0, za = 0; @@ -712,6 +735,33 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV case ZTST_GREATER: test |= zso <= zdo; break; } +#if MY_DEBUG == 1 + if (savePoints) + { + for (int i = 0; i < 4; i++) + { + if (!pointsSWRange.contains(global.s_n)) + { + pointsSWRange[global.s_n] = {10000, 10000, -10000, -10000}; + } + if (global.s_n == s_n_debug) + { + if (sel.ltf) + { + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1}); + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1}); + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1}); + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1}); + } + else + { + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1}); + } + } + } + } +#endif + if (test.alltrue()) continue; } @@ -1119,6 +1169,48 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV uv1 = clamp.blend8(repeat, VectorI::broadcast128(global.t.mask)); } +#if MY_DEBUG == 1 + if (savePoints) + { + for (int i = 0; i < 4; i++) + { + if (test.U32[i] == 0 || sel.notest) + { + if (!pointsSWRange.contains(global.s_n)) + { + pointsSWRange[global.s_n] = {10000, 10000, -10000, -10000}; + } + std::get<0>(pointsSWRange[global.s_n]) = std::min(std::get<0>(pointsSWRange[global.s_n]), (int)uv0.U16[i]); + std::get<1>(pointsSWRange[global.s_n]) = std::min(std::get<1>(pointsSWRange[global.s_n]), (int)uv0.U16[i + 4]); + std::get<2>(pointsSWRange[global.s_n]) = std::max(std::get<2>(pointsSWRange[global.s_n]), (int)uv0.U16[i]); + std::get<3>(pointsSWRange[global.s_n]) = std::max(std::get<3>(pointsSWRange[global.s_n]), (int)uv0.U16[i + 4]); + if (sel.ltf) + { + std::get<0>(pointsSWRange[global.s_n]) = std::min(std::get<0>(pointsSWRange[global.s_n]), (int)uv1.U16[i]); + std::get<1>(pointsSWRange[global.s_n]) = std::min(std::get<1>(pointsSWRange[global.s_n]), (int)uv1.U16[i + 4]); + std::get<2>(pointsSWRange[global.s_n]) = std::max(std::get<2>(pointsSWRange[global.s_n]), (int)uv1.U16[i]); + std::get<3>(pointsSWRange[global.s_n]) = std::max(std::get<3>(pointsSWRange[global.s_n]), (int)uv1.U16[i + 4]); + } + if (global.s_n == s_n_debug) + { + if (sel.ltf) + { + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv0.U16[i + 4]}); + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv1.U16[i + 4]}); + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv1.U16[i], (int)uv0.U16[i + 4]}); + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv1.U16[i], (int)uv1.U16[i + 4]}); + } + else + { + pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv0.U16[i + 4]}); + } + } + + } + } + } +#endif + VectorI y0 = uv0.uph16() << (sel.tw + 3); VectorI x0 = uv0.upl16(); diff --git a/pcsx2/GS/Renderers/SW/GSRasterizer.cpp b/pcsx2/GS/Renderers/SW/GSRasterizer.cpp index dda8409316..c045beed1f 100644 --- a/pcsx2/GS/Renderers/SW/GSRasterizer.cpp +++ b/pcsx2/GS/Renderers/SW/GSRasterizer.cpp @@ -19,6 +19,21 @@ MULTI_ISA_UNSHARED_IMPL; int GSRasterizerData::s_counter = 0; +#include "debug.h" +#if MY_DEBUG == 1 +extern bool savePoints; +extern int s_n_debug; +extern int s_n_exit; +extern int primID; +extern int* primIDSW; +extern std::map> pointsHackRange; +extern std::map> pointsSWRange; +extern std::vector> pointsHackDebug; +extern std::vector> pointsSWDebug; +extern std::map, std::tuple> pointsHackDebugOrig; +extern std::map, std::tuple> pointsSWDebugOrig; +#endif + static int compute_best_thread_height(int threads) { // - for more threads screen segments should be smaller to better distribute the pixels @@ -56,6 +71,10 @@ GSRasterizer::GSRasterizer(GSDrawScanline* ds, int id, int threads) { m_scanline[i] = (i % threads) == id ? 1 : 0; } + +#if MY_DEBUG == 1 + primIDSW = &m_primcount; +#endif } GSRasterizer::~GSRasterizer() @@ -597,6 +616,19 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRIC void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index) { +#if MY_DEBUG == 1 + if (GSState::s_n == s_n_debug) + { + // FIXME; WHAT IS SCALING FOR UV? + double scaleTX = 1 / (double)(1 << m_local.gd->TW) / 256.0 / 256.0; + double scaleTY = 1 / (double)(1 << m_local.gd->TH) / 256.0 / 256.0; + pointsSWDebugOrig[{m_primcount, 0}] = {vertex[index[0]].p.x, vertex[index[0]].p.y, vertex[index[0]].t.x * scaleTX, vertex[index[0]].t.y * scaleTY}; + pointsSWDebugOrig[{m_primcount, 1}] = {vertex[index[1]].p.x, vertex[index[1]].p.y, vertex[index[1]].t.x * scaleTX, vertex[index[1]].t.y * scaleTY}; + pointsSWDebugOrig[{m_primcount, 2}] = {vertex[index[2]].p.x, vertex[index[2]].p.y, vertex[index[2]].t.x * scaleTX, vertex[index[2]].t.y * scaleTY}; + } +#endif + + m_primcount++; GSVertexSW edge; diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp index 01662d605c..f09b6038be 100644 --- a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp @@ -10,8 +10,25 @@ MULTI_ISA_UNSHARED_IMPL; +#define USE_HACK 1 + +#include "debug.h" + +#if MY_DEBUG == 1 +extern bool savePoints; +extern int s_n_debug; +extern int s_n_exit; +extern std::map> pointsHackRange; +extern std::map> pointsSWRange; +extern void dumpRanges(); +#endif + GSRenderer* CURRENT_ISA::makeGSRendererSW(int threads) { +#if MY_DEBUG == 1 + if (savePoints) + threads = 0; +#endif return new GSRendererSW(threads); } @@ -431,8 +448,6 @@ void GSRendererSW::Draw() sd->UsePages(fb_pages, m_context->offset.fb.psm(), zb_pages, m_context->offset.zb.psm()); - // - if (GSConfig.DumpGSData) { Sync(2); @@ -556,6 +571,11 @@ void GSRendererSW::Queue(GSRingHeap::SharedPtr& item) fflush(s_fp); } +#if MY_DEBUG == 1 + sd->global.s_n = s_n; + sd->global.TW = m_context->TEX0.TW; + sd->global.TH = m_context->TEX0.TH; +#endif m_rl->Queue(item); // invalidate new parts rendered onto @@ -1057,7 +1077,54 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap); - GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; + GSVector4i r; +#if MY_DEBUG == 1 + fprintf(stderr, "%d\n", s_n); +#endif + if (m_vt.m_primclass == GS_TRIANGLE_CLASS && USE_HACK) + { + int minU = std::numeric_limits::max(); + int minV = std::numeric_limits::max(); + int maxU = std::numeric_limits::min(); + int maxV = std::numeric_limits::min(); + //GetTriangleMinMaxUV(1 << TEX0.TW, 1 << TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV); + GetTriangleMinMaxUV(1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV); + + r = GSVector4i(minU, minV, maxU + 1, maxV + 1); +#if MY_DEBUG == 1 + if (s_n == 58) + { + printf(""); + } + GSVector4i r2 = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; + if (savePoints) + { + pointsHackRange[s_n] = {minU, minV, maxU, maxV}; + } +#endif + } + else if (m_vt.m_primclass == GS_SPRITE_CLASS && USE_HACK) + { + int minU = std::numeric_limits::max(); + int minV = std::numeric_limits::max(); + int maxU = std::numeric_limits::min(); + int maxV = std::numeric_limits::min(); + //GetSpriteMinMaxUV(1 << TEX0.TW, 1 << TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV); + GetSpriteMinMaxUV(1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV); + + r = GSVector4i(minU, minV, maxU + 1, maxV + 1); +#if MY_DEBUG == 1 + GSVector4i r2 = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; + if (savePoints) + { + pointsHackRange[s_n] = {minU, minV, maxU, maxV}; + } +#endif + } + else + { + r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; + } GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA); diff --git a/pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h b/pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h index 9ae55a2ab9..a823187e2d 100644 --- a/pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h +++ b/pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h @@ -9,6 +9,8 @@ #include #include +#include "debug.h" + union GSScanlineSelector { struct @@ -117,6 +119,12 @@ union GSScanlineSelector struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a pixel shader constant buffer { + // TODO: REMOVE DEBUG CODE +#if MY_DEBUG == 1 + int s_n = -1; + int TW = -1, TH = -1; +#endif + GSScanlineSelector sel; // - the data of vm, tex may change, multi-threaded drawing must be finished before that happens, clut and dimx are copies diff --git a/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp b/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp index 260e9930e9..5cce41cd46 100644 --- a/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp @@ -215,7 +215,8 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) GSVector4i r = rect; - r = r.ralign(bs); + // FIXME: Is this an ok rintersect hack to make sure we don't read outside the texture? + r = r.ralign(bs).rintersect(GSVector4i(0, 0, tw, th)); if (r.eq(GSVector4i(0, 0, tw, th))) { diff --git a/pcsx2/debug.h b/pcsx2/debug.h new file mode 100644 index 0000000000..7c5a473b65 --- /dev/null +++ b/pcsx2/debug.h @@ -0,0 +1,2 @@ +#pragma once +#define MY_DEBUG 1 \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 1d2efe083b..41e17c4f60 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -592,6 +592,7 @@ + @@ -1025,4 +1026,4 @@ - + \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index e72297d4b7..75f1a11dc0 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -220,15 +220,6 @@ {eb697f5b-85f5-424a-a7e4-8d8b73d3426e} - - {9153e32b-e1e3-49ac-b490-b56adfd1692f} - - - {03ba2aa7-2cd9-48cb-93c6-fc93d5bdc938} - - - {78c9db9c-9c7c-4385-90e7-9fa71b922f60} - {e068b724-9319-42e5-9ea7-63d80989ea1d} @@ -283,12 +274,21 @@ {8aea3ae6-9722-463a-94ac-34f3738a3153} - - {cf847f4e-744e-4c27-a7ac-8564726fb4e6} - {cd8ec519-2196-43f7-86de-7faced2d4296} + + {9153e32b-e1e3-49ac-b490-b56adfd1692f} + + + {03ba2aa7-2cd9-48cb-93c6-fc93d5bdc938} + + + {78c9db9c-9c7c-4385-90e7-9fa71b922f60} + + + {cf847f4e-744e-4c27-a7ac-8564726fb4e6} + @@ -1119,19 +1119,19 @@ System\Ps2\GS\Renderers\Direct3D12 - Tools\Input Recording + System\Tools\Input Recording - Tools\Input Recording + System\Tools\Input Recording - Tools\Input Recording + System\Tools\Input Recording - Tools\Input Recording + System\Tools\Input Recording - Tools\Input Recording\Utilities + System\Tools\Input Recording\Utilities System\Ps2\USB @@ -1233,7 +1233,7 @@ System\Ps2\SPU2 - Tools + System\Tools System\Ps2\GS\Renderers\Direct3D11 @@ -1302,7 +1302,7 @@ Misc - Tools + System\Tools Misc\Input @@ -1329,7 +1329,7 @@ Misc - Tools + System\Tools Misc @@ -1432,7 +1432,7 @@ System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec\arm64 - Tools\arm64 + System\Tools\arm64 System\Ps2\EmotionEngine\EE\Dynarec\arm64 @@ -2059,19 +2059,19 @@ System\Ps2\GS\Renderers\Direct3D12 - Tools\Input Recording + System\Tools\Input Recording - Tools\Input Recording + System\Tools\Input Recording - Tools\Input Recording + System\Tools\Input Recording - Tools\Input Recording + System\Tools\Input Recording - Tools\Input Recording\Utilities + System\Tools\Input Recording\Utilities System\Include @@ -2191,7 +2191,7 @@ System\Ps2\IPU - Tools + System\Tools System\Ps2\GS\GIF @@ -2391,7 +2391,7 @@ System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec\arm64 - Tools\arm64 + System\Tools\arm64 System\Ps2\Iop\SIO\PAD @@ -2399,6 +2399,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\GS + @@ -2428,4 +2431,4 @@ System\Ps2\GS - + \ No newline at end of file