diff --git a/common/vsprops/common.props b/common/vsprops/common.props
index d97da47b8d..9b0a030d19 100644
--- a/common/vsprops/common.props
+++ b/common/vsprops/common.props
@@ -52,10 +52,10 @@
_M_X86;__SSE4_1__;%(PreprocessorDefinitions)
- NotSet
- AdvancedVectorExtensions2
+ NotSet
+ AdvancedVectorExtensions2
+
-march=nehalem %(AdditionalOptions)
- -march=haswell %(AdditionalOptions)
-march=armv8.4-a %(AdditionalOptions)
%(AdditionalOptions) /Zc:externConstexpr /Zc:__cplusplus /Zo /utf-8
diff --git a/pcsx2-gsrunner/Main.cpp b/pcsx2-gsrunner/Main.cpp
index d3ad1e8182..7b9dc3b887 100644
--- a/pcsx2-gsrunner/Main.cpp
+++ b/pcsx2-gsrunner/Main.cpp
@@ -45,6 +45,12 @@
#include "pcsx2/VMManager.h"
#include "svnrev.h"
+#include "debug.h"
+#if MY_DEBUG == 1
+
+#include
+extern bool savePoints;
+#endif
namespace GSRunner
{
@@ -141,6 +147,20 @@ bool GSRunner::InitializeConfig()
si.SetStringValue("MemoryCards", fmt::format("Slot{}_Filename", i + 1).c_str(), "");
}
+#if MY_DEBUG == 1
+ if (false)
+ {
+ si.SetBoolValue("EmuCore/GS", "dump", true);
+ si.SetIntValue("EmuCore/GS", "saven", 0);
+ si.SetIntValue("EmuCore/GS", "savel", 100);
+ si.SetBoolValue("EmuCore/GS", "save", true);
+ si.SetBoolValue("EmuCore/GS", "savef", true);
+ si.SetBoolValue("EmuCore/GS", "savet", true);
+ si.SetBoolValue("EmuCore/GS", "savez", true);
+ si.SetStringValue("EmuCore/GS", "HWDumpDirectory", "C:\\Users\\tchan\\Desktop\\ps2_debug");
+ si.SetStringValue("EmuCore/GS", "SWDumpDirectory", "C:\\Users\\tchan\\Desktop\\ps2_debug");
+ }
+#endif
VMManager::Internal::LoadStartupSettings();
return true;
}
@@ -857,8 +877,16 @@ LRESULT CALLBACK WndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam)
return DefWindowProcW(hwnd, msg, wParam, lParam);
}
+#if MY_DEBUG == 1
+extern void dumpRanges();
+#endif
+
int wmain(int argc, wchar_t** argv)
{
+#if MY_DEBUG == 1
+ if (savePoints)
+ atexit(dumpRanges);
+#endif
std::vector u8_args;
u8_args.reserve(static_cast(argc));
for (int i = 0; i < argc; i++)
diff --git a/pcsx2/GS/GSDrawingContext.cpp b/pcsx2/GS/GSDrawingContext.cpp
index 71ca866682..5edd692d84 100644
--- a/pcsx2/GS/GSDrawingContext.cpp
+++ b/pcsx2/GS/GSDrawingContext.cpp
@@ -5,7 +5,11 @@
#include "GS/GSGL.h"
#include "GS/GS.h"
#include "GS/GSUtil.h"
+#include "GS/GSState.h"
+
+// FIXME: RENAME THIS FUNCTION AND CHANGE ARGS NAMES TO BE NICER!
+// MAKE SURE BEING CALLED WITH ARGS IN THE RIGHT ORDER!
static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv)
{
// return max possible texcoord.
@@ -38,10 +42,8 @@ static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv)
{
// REGION_REPEAT adhears to the original texture size, even if offset outside the texture (with MAXUV).
minuv &= limit;
- if (tl < 0)
- uv = minuv | maxuv; // wrap around, just use (any & mask) | fix.
- else
- uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask).
+ int ignore;
+ GSState::UsesRegionRepeat(maxuv, minuv, tl, br, &ignore, &uv);
}
return uv;
@@ -130,18 +132,18 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
if (tw + th >= 19) // smaller sizes aren't worth, they just create multiple entries in the textue cache and the saved memory is less
{
- tw = reduce(uv.x, tw);
- th = reduce(uv.y, th);
+ tw = reduce(uv.x + 1, tw);
+ th = reduce(uv.y + 1, th);
}
if (wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT)
{
- tw = extend(uv.x, tw);
+ tw = extend(uv.x + 1, tw);
}
if (wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT)
{
- th = extend(uv.y, th);
+ th = extend(uv.y + 1, th);
}
GIFRegTEX0 res = TEX0;
diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp
index 83068f15db..f90a0362a3 100644
--- a/pcsx2/GS/GSState.cpp
+++ b/pcsx2/GS/GSState.cpp
@@ -18,10 +18,774 @@
#include
#include
+#include "debug.h"
+
+#define WALK_FULL_EDGE 1
+
int GSState::s_n = 0;
int GSState::s_last_transfer_draw_n = 0;
int GSState::s_transfer_n = 0;
+#if MY_DEBUG == 1
+bool savePoints = false;
+int s_n_debug = -1;
+int s_n_exit = -1;
+int primID = 0;
+int* primIDSW = 0;
+std::map> pointsHackRange;
+std::map> pointsSWRange;
+std::vector> pointsHackDebug;
+std::vector> pointsSWDebug;
+std::map, std::tuple> pointsHackDebugOrig;
+std::map, std::tuple> pointsSWDebugOrig;
+
+void sortPoints(std::vector>& v)
+{
+ std::sort(v.begin(), v.end(), [](const std::tuple& a, const std::tuple& b) {
+ int ai[5] = {std::get<0>(a), std::get<2>(a), std::get<1>(a), std::get<3>(a), std::get<4>(a)}; // prim, y, x, u, v
+ int bi[5] = {std::get<0>(b), std::get<2>(b), std::get<1>(b), std::get<3>(b), std::get<4>(b)}; // prim, y, x, u, v
+ for (int i = 0; i < 5; i++)
+ {
+ if (ai[i] < bi[i])
+ return true;
+ if (ai[i] > bi[i])
+ return false;
+ }
+ return false;
+ });
+}
+
+void dumpRanges()
+{
+ FILE* file = fopen("C:\\Users\\tchan\\Desktop\\log_files\\pointsHackRange.txt", "w");
+ for (auto it = pointsHackRange.begin(); it != pointsHackRange.end(); it++)
+ {
+ auto [x, y, u, v] = it->second;
+ fprintf(file, "%d,%d,%d,%d,%d\n", it->first, x, y, u, v);
+ }
+ fclose(file);
+
+ char fileName[1024];
+ sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsHackDebug_%d.txt", s_n_debug);
+ file = fopen(fileName, "w");
+ sortPoints(pointsHackDebug);
+ for (auto it = pointsHackDebug.begin(); it != pointsHackDebug.end(); it++)
+ {
+ auto [n, x, y, u, v] = *it;
+ fprintf(file, "%d,%d,%d,%d,%d\n", n, x, y, u, v);
+ }
+ fclose(file);
+
+ sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsHackDebugOrig_%d.txt", s_n_debug);
+ file = fopen(fileName, "w");
+ for (auto it = pointsHackDebugOrig.begin(); it != pointsHackDebugOrig.end(); it++)
+ {
+ auto [prim_id, vert_id] = it->first;
+ auto [x, y, u, v] = it->second;
+ fprintf(file, "%d,%d,%f,%f,%f,%f Hack\n", prim_id, vert_id, x, y, u, v);
+ }
+ fclose(file);
+
+ file = fopen("C:\\Users\\tchan\\Desktop\\log_files\\pointsSWRange.txt", "w");
+ for (auto it = pointsSWRange.begin(); it != pointsSWRange.end(); it++)
+ {
+ auto [x, y, u, v] = it->second;
+ fprintf(file, "%d,%d,%d,%d,%d\n", it->first, x, y, u, v);
+ }
+ fclose(file);
+
+ sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsSWDebug_%d.txt", s_n_debug);
+ file = fopen(fileName, "w");
+ for (auto it = pointsSWDebug.begin(); it != pointsSWDebug.end(); it++)
+ {
+ auto [n, x, y, u, v] = *it;
+ fprintf(file, "%d,%d,%d,%d,%d\n", n, x, y, u, v);
+ }
+ fclose(file);
+
+ sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsSWDebugOrig_%d.txt", s_n_debug);
+ file = fopen(fileName, "w");
+ sortPoints(pointsSWDebug);
+ for (auto it = pointsSWDebugOrig.begin(); it != pointsSWDebugOrig.end(); it++)
+ {
+ auto [prim_id, vert_id] = it->first;
+ auto [x, y, u, v] = it->second;
+ fprintf(file, "%d,%d,%f,%f,%f,%f SW\n", prim_id, vert_id, x, y, u, v);
+ }
+ fclose(file);
+
+ // Compare
+ for (auto it = pointsHackRange.begin(); it != pointsHackRange.end(); it++)
+ {
+ if (pointsSWRange.contains(it->first))
+ {
+ auto [x1, y1, u1, v1] = it->second;
+ auto [x2, y2, u2, v2] = pointsSWRange[it->first];
+ if (x1 != x2 || y1 != y2 || u1 != u2 || v1 != v2)
+ {
+ char c[1024];
+ sprintf(c, "Wrong range: %d; %d %d %d %d; %d %d %d %d;", it->first, x1, y1, u1, v1, x2, y2, u2, v2);
+ //throw std::exception(c);
+ }
+ }
+ }
+}
+#endif
+
+__forceinline GSState::EdgeFunction GSState::GetEdgeFunction(const Point& a, const Point& b)
+{
+ return {a.y - b.y, b.x - a.x, a.x * b.y - a.y * b.x};
+}
+
+__forceinline bool GSState::CheckEdgeFunction(double e, EdgeType edgeType)
+{
+ if (edgeType == EdgeType::TOP || edgeType == EdgeType::LEFT)
+ {
+ return e >= 0;
+ }
+ else if (edgeType == EdgeType::RIGHT || edgeType == EdgeType::BOTTOM)
+ {
+ return e > 0;
+ }
+ else
+ {
+ pxFail("Invalid edge type");
+ }
+}
+
+__forceinline std::tuple GSState::InterpolateEdgeFunctionsUV(
+ double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2)
+{
+ return {(e12 * p0.U + e20 * p1.U + e01 * p2.U) / (e01 + e12 + e20), (e12 * p0.V + e20 * p1.V + e01 * p2.V) / (e01 + e12 + e20)};
+}
+
+__forceinline std::tuple GSState::InterpolateEdgeFunctionsSTQ(
+ double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2)
+{
+ double S = (e12 * p0.S + e20 * p1.S + e01 * p2.S) / (e01 + e12 + e20);
+ double T = (e12 * p0.T + e20 * p1.T + e01 * p2.T) / (e01 + e12 + e20);
+ double Q = (e12 * p0.Q + e20 * p1.Q + e01 * p2.Q) / (e01 + e12 + e20);
+ return {S, T, Q};
+}
+
+__forceinline bool GSState::CheckXYBounds(double x, double y, int minX, int minY, int maxX, int maxY, bool dxBigger, bool dyBigger)
+{
+ if (dxBigger)
+ {
+ return minX <= x && x <= maxX;
+ } else if (dyBigger)
+ {
+ return minY <= y && y <= maxY;
+ } else
+ {
+ return (minX <= x && x <= maxX) || (minY <= y && y <= maxY);
+ }
+}
+
+// Order points by y (ascending) then x (ascending)
+// Make sure conforms to rule where interior of triangle is on the right of each directed edge.
+__forceinline std::tuple GSState::SortPoints(Point p0, Point p1, Point p2)
+{
+ if ((p1.x - p0.x) * (p2.y - p0.y) - (p1.y - p0.y) * (p2.x - p0.x) < 0)
+ {
+ std::swap(p1, p2);
+ }
+
+ Point p[3] = {p0, p1, p2};
+ int i0 = 0;
+ for (int i = 1; i < 3; ++i)
+ {
+ if (p[i].y < p[i0].y || (p[i].y == p[i0].y && p[i].x < p[i0].x))
+ {
+ i0 = i;
+ }
+ }
+ return {p[i0], p[(i0 + 1) % 3], p[(i0 + 2) % 3]};
+}
+
+// Order the points so that p0-p1 is a top or right edge, p1-p2 is a right or bottom edge, and p0-p2 is a left edge.
+__forceinline std::tuple
+ GSState::SortAndClassifyTriangleVerts(Point p0, Point p1, Point p2)
+{
+ std::tie(p0, p1, p2) = SortPoints(p0, p1, p2);
+ EdgeType t01 = (p0.y == p1.y) ? EdgeType::TOP : EdgeType::RIGHT;
+ EdgeType t12 = (p1.y < p2.y) ? EdgeType::RIGHT : (p1.y > p2.y ? EdgeType::LEFT : EdgeType::BOTTOM);
+ EdgeType t20 = EdgeType::LEFT;
+ return {p0, p1, p2, t01, t12, t20};
+}
+
+__forceinline bool GSState::CheckScissor(int x, int y, int SCAX0, int SCAY0, int SCAX1, int SCAY1)
+{
+ return (SCAX0 <= x && x <= SCAX1) && (SCAY0 <= y && y <= SCAY1);
+}
+
+// Calculate the final UV coordinates of the rasterized points by performing wrapping/clamping.
+// Then adjust the min/max UV values accordingly.
+// TODO: We can remove this function. Not used.
+std::tuple GSState::CalculateUVHelper(int U, int V, int W, int H, const GIFRegCLAMP& clamp)
+{
+ // Initial clamping done on all UVs
+ U = std::max(-2047, std::min(2047, U));
+ V = std::max(-2047, std::min(2047, V));
+
+ // Clamping/wrapping for U
+ const int MINU = static_cast(clamp.MINU);
+ const int MAXU = static_cast(clamp.MAXU);
+ const int MSKU = MINU, FIXU = MAXU;
+ switch (clamp.WMS) // U clamping/wrapping mode
+ {
+ case CLAMP_REPEAT:
+ U &= W - 1; // W is a power of 2
+ break;
+ case CLAMP_CLAMP:
+ U = std::max(0, std::min(W - 1, U));
+ break;
+ case CLAMP_REGION_CLAMP:
+ U = std::max(MINU, std::min(MAXU, U));
+ break;
+ case CLAMP_REGION_REPEAT:
+ U = (U & MSKU) | FIXU;
+ break;
+ }
+
+ // Clamping/wrapping for V
+ int MINV = static_cast(clamp.MINV);
+ int MAXV = static_cast(clamp.MAXV);
+ int MSKV = MINV, FIXV = MAXV;
+ switch (clamp.WMT) // V clamping/wrapping mode
+ {
+ case CLAMP_REPEAT:
+ V &= H - 1; // H is a power of 2
+ break;
+ case CLAMP_CLAMP:
+ V = std::max(0, std::min(H - 1, V));
+ break;
+ case CLAMP_REGION_CLAMP:
+ V = std::max(MINV, std::min(MAXV, V));
+ break;
+ case CLAMP_REGION_REPEAT:
+ V = (V & MSKV) | FIXV;
+ break;
+ default:
+ ASSUME(0);
+ }
+
+ return {U, V};
+}
+
+#if MY_DEBUG == 1
+bool g_switchOrient = false;
+#endif
+
+void GSState::CalculateUV(double e01, double e12, double e20, EdgeType t0, EdgeType t1, EdgeType t2,
+ const Point& p0, const Point& p1, const Point& p2,
+ int W, int H, bool FST, bool bilinear, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV)
+{
+ double U, V;
+ if (FST)
+ {
+ std::tie(U, V) = InterpolateEdgeFunctionsUV(e01, e12, e20, p0, p1, p2);
+ }
+ else
+ {
+ auto [S, T, Q] = InterpolateEdgeFunctionsSTQ(e01, e12, e20, p0, p1, p2);
+ U = W * (S / Q);
+ V = H * (T / Q);
+ }
+
+ // Clamp to valid UV range
+ U = std::max(-2047.0, std::min(2047.0, U));
+ V = std::max(-2047.0, std::min(2047.0, V));
+
+ if (bilinear)
+ {
+#if MY_DEBUG == 1
+ if (GSState::s_n == s_n_debug)
+ {
+ int x = (int)((e01 * p2.x + e12 * p0.x + e20 * p1.x) / (e01 + e12 + e20));
+ int y = (int)((e01 * p2.y + e12 * p0.y + e20 * p1.y) / (e01 + e12 + e20));
+ if (g_switchOrient)
+ y = -y;
+ pointsHackDebug.push_back({primID, x, y, (int)std::floor(U - 0.5), (int)std::floor(V - 0.5)});
+ pointsHackDebug.push_back({primID, x, y, (int)std::floor(U - 0.5), (int)std::floor(V + 0.5)});
+ pointsHackDebug.push_back({primID, x, y, (int)std::floor(U + 0.5), (int)std::floor(V - 0.5)});
+ pointsHackDebug.push_back({primID, x, y, (int)std::floor(U + 0.5), (int)std::floor(V + 0.5)});
+ }
+#endif
+ minU = std::min(static_cast(std::floor(U - 0.5)), minU);
+ minV = std::min(static_cast(std::floor(V - 0.5)), minV);
+ maxU = std::max(static_cast(std::floor(U + 0.5)), maxU);
+ maxV = std::max(static_cast(std::floor(V + 0.5)), maxV);
+ }
+ else
+ {
+#if MY_DEBUG == 1
+ if (GSState::s_n == s_n_debug)
+ {
+ int x = (int)((e01 * p2.x + e12 * p0.x + e20 * p1.x) / (e01 + e12 + e20));
+ int y = (int)((e01 * p2.y + e12 * p0.y + e20 * p1.y) / (e01 + e12 + e20));
+ if (g_switchOrient)
+ y = -y;
+ pointsHackDebug.push_back({primID, x, y, (int)std::floor(U), (int)std::floor(V)});
+ }
+#endif
+ minU = std::min(static_cast(std::floor(U)), minU);
+ minV = std::min(static_cast(std::floor(V)), minV);
+ maxU = std::max(static_cast(std::floor(U)), maxU);
+ maxV = std::max(static_cast(std::floor(V)), maxV);
+ }
+}
+
+// Test if any of the 4 scissor corners will be rasterized.
+// Helps with getting the proper min/max when the triangle is scissored
+void GSState::CheckScissorUV(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear,
+ const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV)
+{
+ int SCAX0 = static_cast(scissor.SCAX0);
+ int SCAY0 = static_cast(scissor.SCAY0);
+ int SCAX1 = static_cast(scissor.SCAX1);
+ int SCAY1 = static_cast(scissor.SCAY1);
+
+ // Get edge function coefficients
+ EdgeFunction E01 = GetEdgeFunction(p0, p1);
+ EdgeFunction E12 = GetEdgeFunction(p1, p2);
+ EdgeFunction E20 = GetEdgeFunction(p2, p0);
+
+ for (int x : {SCAX0, SCAX1})
+ {
+ for (int y : {SCAY0, SCAY1})
+ {
+ double e01 = E01.a * x + E01.b * y + E01.c;
+ double e12 = E12.a * x + E12.b * y + E12.c;
+ double e20 = E20.a * x + E20.b * y + E20.c;
+ if (CheckEdgeFunction(e01, t01) && CheckEdgeFunction(e12, t12) && CheckEdgeFunction(e20, t20))
+ {
+ CalculateUV(e01, e12, e20, t01, t12, t20, p0, p1, p2, W, H, FST, bilinear, clamp, minU, minV, maxU, maxV);
+ }
+ }
+ }
+}
+
+void GSState::EdgeWalkTriangleMinMaxUVImpl(Point p0, Point p1, Point p2,
+ EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear,
+ const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, bool switchOrient, int& minU, int& minV, int& maxU, int& maxV)
+{
+ int SCAX0 = static_cast(scissor.SCAX0);
+ int SCAY0 = static_cast(scissor.SCAY0);
+ int SCAX1 = static_cast(scissor.SCAX1);
+ int SCAY1 = static_cast(scissor.SCAY1);
+
+#if MY_DEBUG == 1
+ g_switchOrient = switchOrient;
+#endif
+ // To traverse in the opposite direction we swap the first and second points
+ // and flip the whole triangle vertically (this is so that right-hand interior rule can still be followed).
+ // This only affects XY so doesn't matter for computing UV ranges.
+ if (switchOrient)
+ {
+ std::swap(p0, p1);
+ std::swap(t12, t20);
+ p0.y = -p0.y;
+ p1.y = -p1.y;
+ p2.y = -p2.y;
+ std::tie(SCAY0, SCAY1) = std::tuple(-SCAY1, -SCAY0);
+ };
+
+ // Get edge function coefficients
+ EdgeFunction E01 = GetEdgeFunction(p0, p1);
+ EdgeFunction E12 = GetEdgeFunction(p1, p2);
+ EdgeFunction E20 = GetEdgeFunction(p2, p0);
+
+ // Initialize deltas and steps
+ double dx = p1.x - p0.x;
+ double dy = p1.y - p0.y;
+ int sx = (dx > 0) ? 1.0 : -1.0;
+ int sy = (dy > 0) ? 1.0 : -1.0;
+
+ // Initialize starting point by rounding correctly
+ // Use right-hand-interior rule and brute force case analysis to get the correct starting point
+ // FIXME: THIS IS WRONG!!! THERE CAN REALLY BE 3 CANDIDDATE POINTS
+ int x, y;
+ if (dy == 0.0)
+ {
+ if (dx > 0.0)
+ {
+ x = static_cast(std::floor(p0.x));
+ y = static_cast(std::ceil(p0.y));
+ sy = 1;
+ }
+ else
+ {
+ x = static_cast(std::ceil(p0.x));
+ y = static_cast(std::floor(p0.y));
+ sy = -1;
+ }
+ }
+ else if (dx == 0.0)
+ {
+ if (dy > 0.0)
+ {
+ y = static_cast(std::floor(p0.y));
+ x = static_cast(std::floor(p0.x));
+ sx = -1;
+ }
+ else
+ {
+ y = static_cast(std::ceil(p0.y));
+ x = static_cast(std::ceil(p0.x));
+ sx = 1;
+ }
+ }
+ else if ((dx > 0.0) && (dy > 0.0))
+ {
+ if (E01.a * std::ceil(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0.0)
+ {
+ x = static_cast(std::ceil(p0.x));
+ y = static_cast(std::ceil(p0.y));
+ }
+ else if (E01.a * std::floor(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0.0)
+ {
+ x = static_cast(std::floor(p0.x));
+ y = static_cast(std::ceil(p0.y));
+ }
+ else
+ {
+ // Should be unreachable
+ pxFail("Invalid edge function");
+ }
+ }
+ else if ((dx < 0.0) && (dy > 0.0))
+ {
+ if (E01.a * std::floor(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0)
+ {
+ x = static_cast(std::floor(p0.x));
+ y = static_cast(std::ceil(p0.y));
+ }
+ else if (E01.a * std::floor(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0)
+ {
+ x = static_cast(std::floor(p0.x));
+ y = static_cast(std::floor(p0.y));
+ }
+ else
+ {
+ // Should be unreachable
+ pxFail("Invalid edge function");
+ }
+ }
+ else if ((dx < 0.0) && (dy < 0.0))
+ {
+ if (E01.a * std::floor(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0)
+ {
+ x = static_cast(std::floor(p0.x));
+ y = static_cast(std::floor(p0.y));
+ }
+ else if (E01.a * std::ceil(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0)
+ {
+ x = static_cast(std::ceil(p0.x));
+ y = static_cast(std::floor(p0.y));
+ }
+ else
+ {
+ // Should be unreachable
+ pxFail("Invalid edge function");
+ }
+ }
+ else if ((dx > 0) && (dy < 0))
+ {
+ if (E01.a * std::ceil(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0)
+ {
+ x = static_cast(std::ceil(p0.x));
+ y = static_cast(std::floor(p0.y));
+ }
+ else if (E01.a * std::ceil(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0)
+ {
+ x = static_cast(std::ceil(p0.x));
+ y = static_cast(std::ceil(p0.y));
+ }
+ else
+ {
+ pxFail("Invalid edge function");
+ }
+ }
+
+ // Initialize edge function values
+ double e01 = E01.a * x + E01.b * y + E01.c;
+ double e12 = E12.a * x + E12.b * y + E12.c;
+ double e20 = E20.a * x + E20.b * y + E20.c;
+
+ // Bounding box of the edge
+ const int minX = static_cast(std::floor(std::min(p0.x, p1.x)));
+ const int minY = static_cast(std::floor(std::min(p0.y, p1.y)));
+ const int maxX = static_cast(std::ceil(std::max(p0.x, p1.x)));
+ const int maxY = static_cast(std::ceil(std::max(p0.y, p1.y)));
+ bool dxBigger = std::abs(dx) > std::abs(dy);
+ bool dyBigger = std::abs(dy) > std::abs(dx);
+
+ // Walk along the longest axis
+ while (CheckXYBounds(x, y, minX, minY, maxX, maxY, dxBigger, dyBigger)) // Check if the current point is inside the bounds of the line
+ {
+ // Update min/max UV values if the current point is inside the triangle and XY is inside the scissor
+ if (CheckEdgeFunction(e01, t01) && CheckEdgeFunction(e12, t12) && CheckEdgeFunction(e20, t20) && CheckScissor(x, y, SCAX0, SCAY0, SCAX1, SCAY1))
+ {
+ CalculateUV(e01, e12, e20, t01, t12, t20, p0, p1, p2, W, H, FST, bilinear, clamp, minU, minV, maxU, maxV);
+
+ // FIXME: TEST WITH dx == 0 and dy == 0 optimization for
+ // horiz and vert edges!
+ if (!WALK_FULL_EDGE)
+ // if (!WALK_FULL_EDGE || dx == 0 || dy == 0)
+ {
+ // Break after the first rasterizable point. If a horizontal or vertical edge, this should be sufficient
+ // since we walk along the edge in both directions.
+ // Otherwise, this might not always be the "most outside" point but should be much faster.
+ break;
+ }
+ }
+ // Test the horizontal and vertical edge functions
+ double e01x = e01 + E01.a * sx;
+ double e01y = e01 + E01.b * sy;
+ if (!CheckEdgeFunction(e01x, t01))
+ {
+ // Cannot go horizontally, so go vertically
+ y += sy;
+ e01 += E01.b * sy;
+ e12 += E12.b * sy;
+ e20 += E20.b * sy;
+ }
+ else if (!CheckEdgeFunction(e01y, t01))
+ {
+ // Cannot go vertically, so go horizontally
+ x += sx;
+ e01 += E01.a * sx;
+ e12 += E12.a * sx;
+ e20 += E20.a * sx;
+ }
+ else if (e01x < e01y)
+ {
+ // Can go both, but horizontal is closer to the line
+ x += sx;
+ e01 += E01.a * sx;
+ e12 += E12.a * sx;
+ e20 += E20.a * sx;
+ }
+ else if (e01x > e01y)
+ {
+ // Can go both, but diagonal is closer to the line
+ y += sy;
+ e01 += E01.b * sy;
+ e12 += E12.b * sy;
+ e20 += E20.b * sy;
+ }
+ else
+ {
+ // Should be unreachable
+ pxFail("Invalid edge function");
+ }
+ }
+}
+
+void GSState::EdgeWalkTriangleMinMaxUV(Point p0, Point p1, Point p2, int W, int H, bool FST, bool bilinear, GIFRegSCISSOR scissor, GIFRegCLAMP clamp,
+ int& minU, int& minV, int& maxU, int& maxV)
+{
+ // Rearrange the points in the correct order and walk along all edges
+ auto [v0_, v1_, v2_, t01, t12, t20] = SortAndClassifyTriangleVerts(p0, p1, p2);
+
+ // Check if the corners of the scissor region are inside the triangle
+ CheckScissorUV(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, minU, minV, maxU, maxV);
+
+ // Walk along edges in the clockwise direction (if Y-axis points down)
+ EdgeWalkTriangleMinMaxUVImpl(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV);
+ EdgeWalkTriangleMinMaxUVImpl(v1_, v2_, v0_, t12, t20, t01, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV);
+ EdgeWalkTriangleMinMaxUVImpl(v2_, v0_, v1_, t20, t01, t12, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV);
+
+ // Walk along edges in the anti-clockwise direction (if Y-axis points down)
+ EdgeWalkTriangleMinMaxUVImpl(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV);
+ EdgeWalkTriangleMinMaxUVImpl(v1_, v2_, v0_, t12, t20, t01, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV);
+ EdgeWalkTriangleMinMaxUVImpl(v2_, v0_, v1_, t20, t01, t12, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV);
+}
+
+// Get the minimum and maximum UV coordinates of all triangles in the current vertex buffer
+// This should only be called if all vertices in the buffer are complete triangles
+// W and H are the texture width and height
+// minU, minV, maxU, maxV are the current min/max UV values and the output
+// bilinear is whether or not to use bilinear interpolation (if false, nearest neighbor is used)
+void GSState::GetTriangleMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const
+{
+ // Get XY offset values to get XY in window coordinates
+ int OFX = static_cast(m_context->XYOFFSET.OFX);
+ int OFY = static_cast(m_context->XYOFFSET.OFY);
+
+ pxAssert((m_index.tail % 3) == 0); // should be a multiple of 3 for triangles
+
+ // Initialize min/max UV values
+ minU = std::numeric_limits::max();
+ minV = std::numeric_limits::max();
+ maxU = std::numeric_limits::min();
+ maxV = std::numeric_limits::min();
+
+ // Iterate through each triangle and get min/max UVs
+ for (size_t tri_i = 0; tri_i < m_index.tail / 3; tri_i++)
+ {
+#if MY_DEBUG == 1
+ primID = tri_i;
+ if (s_n == 58 && tri_i == 1094)
+ {
+ printf("\n");
+ }
+#endif
+ Point verts[3];
+ for (int vert_i = 0; vert_i < 3; vert_i++)
+ {
+ Point p;
+ const int xi = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].XYZ.X);
+ const int yi = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].XYZ.Y);
+ p.x = static_cast(xi - OFX) / 16.0;
+ p.y = static_cast(yi - OFY) / 16.0;
+ if (m_draw_env->PRIM.FST)
+ {
+ p.U = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].U) / 16.0;
+ p.V = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].V) / 16.0;
+ }
+ else
+ {
+ p.S = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].ST.S);
+ p.T = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].ST.T);
+ p.Q = static_cast(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].RGBAQ.Q);
+ }
+ verts[vert_i] = p;
+ }
+
+#if MY_DEBUG == 1
+ if (s_n == s_n_debug)
+ {
+ pointsHackDebugOrig[{tri_i, 0}] = {verts[0].x, verts[0].y, verts[0].S, verts[0].T};
+ pointsHackDebugOrig[{tri_i, 1}] = {verts[1].x, verts[1].y, verts[1].S, verts[1].T};
+ pointsHackDebugOrig[{tri_i, 2}] = {verts[2].x, verts[2].y, verts[2].S, verts[2].T};
+ }
+#endif
+ EdgeWalkTriangleMinMaxUV(verts[0], verts[1], verts[2], W, H, m_draw_env->PRIM.FST, bilinear, m_context->SCISSOR, m_context->CLAMP, minU, minV, maxU, maxV);
+ }
+
+ GSState::GetClampWrapMinMaxUV(W, m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, minU, maxU, minU, maxU);
+ GSState::GetClampWrapMinMaxUV(H, m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, minV, maxV, minV, maxV);
+}
+
+// Get the minimum and maximum UV coordinates of all sprites in the current vertex buffer
+// This should only be called if all vertices in the buffer are complete sprites
+// W and H are the texture width and height
+// minU, minV, maxU, maxV are the current min/max UV values and the output
+// bilinear is whether or not to use bilinear interpolation (if false, nearest neighbor is used)
+void GSState::GetSpriteMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const
+{
+ // Get XY offset values to get XY in window coordinates
+ int OFX = static_cast(m_context->XYOFFSET.OFX);
+ int OFY = static_cast(m_context->XYOFFSET.OFY);
+
+ pxAssert((m_index.tail % 2) == 0); // should be a multiple of 2 for sprites
+
+ // Initialize min/max UV values
+ minU = std::numeric_limits::max();
+ minV = std::numeric_limits::max();
+ maxU = std::numeric_limits::min();
+ maxV = std::numeric_limits::min();
+
+ // Iterate through each sprite and get min/max UVs
+ for (size_t sprite_i = 0; sprite_i < m_index.tail / 2; sprite_i++)
+ {
+ Point verts[2];
+ for (int vert_i = 0; vert_i < 2; vert_i++)
+ {
+ int xi = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].XYZ.X);
+ int yi = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].XYZ.Y);
+ Point p;
+ p.x = static_cast(xi - OFX) / 16.0;
+ p.y = static_cast(yi - OFY) / 16.0;
+ if (m_draw_env->PRIM.FST)
+ {
+ p.U = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].U) / 16.0;
+ p.V = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].V) / 16.0;
+ }
+ else
+ {
+ p.U = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].ST.S) * W;
+ p.T = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].ST.T) * H;
+ p.Q = static_cast(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].RGBAQ.Q);
+ }
+ verts[vert_i] = p;
+ }
+
+ if (!m_draw_env->PRIM.FST)
+ {
+ // For sprites, always use the second Q value
+ verts[0].S = verts[0].S / verts[1].Q;
+ verts[0].T = verts[0].T / verts[1].Q;
+ verts[1].S = verts[1].S / verts[1].Q;
+ verts[1].T = verts[1].T / verts[1].Q;
+ }
+
+ double x0 = std::min(verts[0].x, verts[1].x);
+ double y0 = std::min(verts[0].y, verts[1].y);
+ double x1 = std::max(verts[0].x, verts[1].x);
+ double y1 = std::max(verts[0].y, verts[1].y);
+
+ double U0 = verts[0].x == x0 ? verts[0].U : verts[1].U;
+ double V0 = verts[0].y == y0 ? verts[0].V : verts[1].V;
+ double U1 = verts[0].x == x1 ? verts[0].U : verts[1].U;
+ double V1 = verts[0].y == y1 ? verts[0].V : verts[1].V;
+
+ int xVals[2] = { static_cast(std::ceil(x0)), static_cast(std::floor(x1)) };
+ int yVals[2] = { static_cast(std::ceil(y0)), static_cast(std::floor(y1)) };
+ if (std::floor(x1) == x1) // omit right edges
+ xVals[1]--;
+ if (std::floor(y1) == y1) // omit bottom edges
+ yVals[1]--;
+
+ // scissoring
+ int SCAX0 = static_cast(m_context->SCISSOR.SCAX0);
+ int SCAY0 = static_cast(m_context->SCISSOR.SCAY0);
+ int SCAX1 = static_cast(m_context->SCISSOR.SCAX1);
+ int SCAY1 = static_cast(m_context->SCISSOR.SCAY1);
+ for (int i = 0; i < 2; i++)
+ xVals[i] = std::max(SCAX0, std::min(xVals[i], SCAX1));
+ for (int i = 0; i < 2; i++)
+ yVals[i] = std::max(SCAY0, std::min(yVals[i], SCAY1));
+
+ if (xVals[0] <= xVals[1] && yVals[0] <= yVals[1])
+ {
+ for (double x : xVals)
+ {
+ const double U = ((x1 - x) * U0 + (x - x0) * U1) / (x1 - x0);
+ if (bilinear)
+ {
+ minU = std::min(static_cast(std::floor(U - 0.5)), minU);
+ maxU = std::max(static_cast(std::floor(U + 0.5)), maxU);
+ }
+ else
+ {
+ minU = std::min(static_cast(std::floor(U)), minU);
+ maxU = std::max(static_cast(std::floor(U)), maxU);
+ }
+ }
+ for (double y : yVals)
+ {
+ const double V = ((y1 - y) * V0 + (y - y0) * V1) / (y1 - y0);
+ if (bilinear)
+ {
+ minV = std::min(static_cast(std::floor(V - 0.5)), minV);
+ maxV = std::max(static_cast(std::floor(V + 0.5)), maxV);
+ }
+ else
+ {
+ minV = std::min(static_cast(std::floor(V)), minV);
+ maxV = std::max(static_cast(std::floor(V)), maxV);
+ }
+ }
+ }
+ }
+
+ GSState::GetClampWrapMinMaxUV(W, m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, minU, maxU, minU, maxU);
+ GSState::GetClampWrapMinMaxUV(H, m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, minV, maxV, minV, maxV);
+}
+
static __fi bool IsAutoFlushEnabled()
{
return GSIsHardwareRenderer() ? (GSConfig.UserHacks_AutoFlush != GSHWAutoFlushLevel::Disabled) : GSConfig.AutoFlushSW;
@@ -1675,6 +2439,7 @@ void GSState::FlushPrim()
}
#endif
+ // TODO: Put the accurate UV calculation here?
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
// Texel coordinate rounding
@@ -3716,16 +4481,117 @@ __forceinline void GSState::VertexKick(u32 skip)
Flush(VERTEXCOUNT);
}
+// FIXME: Replace old UsesRepeatRange to this.
+// Maps the range [ min .. max ] under the region repeat function determined by MSK and FIX.
+// The region repeat function is f(x) = (x & MSK) | FIX.
+// Return true if f(x) != x for at least one x in [ min .. max ].
+bool GSState::GetRegionRepeatMinMaxUV(int MSK, int FIX, int min, int max, int& min_out, int& max_out)
+{
+ // If we cross from -1 to 0 combine the negative and positive parts separately
+ // as the below algorithm only works if min <= max as unsigned integers.
+ if (min < 0 && 0 <= max)
+ {
+ int min_out_1, max_out_1, min_out_2, max_out_2;
+ const bool modified_1 = GSState::GetRegionRepeatMinMaxUV(MSK, FIX, min, -1, min_out_1, max_out_1);
+ const bool modified_2 = GSState::GetRegionRepeatMinMaxUV(MSK, FIX, 0, max, min_out_2, max_out_2);
+ min_out = std::min(min_out_1, min_out_2);
+ max_out = std::max(max_out_1, max_out_2);
+ return modified_1 || modified_2;
+ }
+
+ const int cleared_bits = ~MSK & ~FIX; // Bits that are always cleared by applying msk and fix
+ const int set_bits = FIX; // Bits that are always set by applying msk and fix
+ unsigned long msb;
+ int variable_bits = min ^ max;
+ if (_BitScanReverse(&msb, variable_bits))
+ variable_bits |= (1 << msb) - 1; // Fill in all lower bits
+
+ const int always_set = min & ~variable_bits; // Bits that are set in every value in min...max
+ const int sometimes_set = min | variable_bits; // Bits that are set in at least one value in min...max
+
+ const bool sets_bits = (set_bits | always_set) != always_set; // At least one bit in min...max is set by applying msk and fix
+ const bool clears_bits = (cleared_bits & sometimes_set) != 0; // At least one bit in min...max is cleared by applying msk and fix
+
+ const int overwritten_variable_bits = (cleared_bits | set_bits) & variable_bits;
+ // A variable bit that's `0` in `min` will at some point switch to a `1` (because it's variable)
+ // When it does, all bits below it will switch to a `0` (that's how incrementing works)
+ // If the 0 to 1 switch is reflected in the final output (not masked and not replaced by a fixed value),
+ // the final value would be larger than the previous. Otherwise, the final value will be less.
+ // The true minimum value is `min` with all bits below the most significant replaced variable `0` bit cleared
+ const int min_overwritten_variable_zeros = ~min & overwritten_variable_bits;
+ if (_BitScanReverse(&msb, min_overwritten_variable_zeros))
+ min &= (~0u << msb);
+ // Similar thing for max, but the first masked `1` bit
+ const int max_overwritten_variable_ones = max & overwritten_variable_bits;
+ if (_BitScanReverse(&msb, max_overwritten_variable_ones))
+ max |= (1 << msb) - 1;
+
+ min_out = (MSK & min) | FIX;
+ max_out = (MSK & max) | FIX;
+
+ return sets_bits || clears_bits;
+}
+
+// Get the min/max texel coordinate (U or V) assuming it takes the values min .. max and is then
+// wrapped/clamped according to the mode WM.
+// SIZE: Width/height of texture (power of 2)
+// MIN/MAX: Either the clamping range (in REGION_CLAMP mode) or the MKS/FIX parameters (in REGION_REPEAT mode)
+// Returns true if any of the values are changed. I.e., if f(x) is the mapping function for clamp/wrap mode,
+// return true if f(x) != x for some x in [ min .. max ]
+bool GSState::GetClampWrapMinMaxUV(int SIZE, int WM, int MIN, int MAX, int min, int max, int& min_out, int& max_out)
+{
+ const int MSK = MIN;
+ const int FIX = MAX;
+
+ if (WM == CLAMP_REPEAT)
+ {
+ // If we cross the SIZE boundary then we always get the largest/smallest possible wrapped value
+ if ((min & ~(SIZE - 1)) != (max & ~(SIZE - 1)))
+ {
+ min_out = 0;
+ max_out = SIZE - 1;
+ }
+ else
+ {
+ min_out = min & (SIZE - 1);
+ max_out = max & (SIZE - 1);
+ }
+ return 0 <= min && max <= SIZE - 1;
+ }
+ else if (WM == CLAMP_CLAMP)
+ {
+ min_out = std::max(0, std::min(SIZE - 1, min));
+ max_out = std::max(0, std::min(SIZE - 1, max));
+ return 0 <= min && max <= SIZE - 1;
+ }
+ else if (WM == CLAMP_REGION_CLAMP)
+ {
+ min_out = std::max(MIN, std::min(MAX, min));
+ max_out = std::max(MIN, std::min(MAX, max));
+ return MIN <= min && max <= MAX;
+ }
+ else if (WM == CLAMP_REGION_REPEAT)
+ {
+ return GSState::GetRegionRepeatMinMaxUV(MSK, FIX, min, max, min_out, max_out);
+ }
+ else
+ {
+ pxAssertMsg(false, "Invalid clamp/wrap mode");
+ return false;
+ }
+}
+
/// Checks if region repeat is used (applying it does something to at least one of the values in min...max)
/// Also calculates the real min and max values seen after applying the region repeat to all values in min...max
-static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out)
+/// FIXME: CHANGE MEMBER NAMES AND MAKE SURE CALLS HAVE ARGS IN IN RIGHT ORDER!!!
+bool GSState::UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out)
{
if ((min < 0) != (max < 0))
{
// Algorithm doesn't work properly if bits overflow when incrementing (happens on the -1 → 0 crossing)
// Conveniently, crossing zero guarantees you use the full range
*min_out = fix;
- *max_out = (fix | msk) + 1;
+ *max_out = fix | msk;
return true;
}
@@ -3757,7 +4623,7 @@ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, i
max |= (1 << msb) - 1;
*min_out = (msk & min) | fix;
- *max_out = ((msk & max) | fix) + 1;
+ *max_out = (msk & max) | fix;
return sets_bits || clears_bits;
}
diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h
index 94f9a5442b..773566c78f 100644
--- a/pcsx2/GS/GSState.h
+++ b/pcsx2/GS/GSState.h
@@ -22,6 +22,9 @@ public:
virtual ~GSState();
static constexpr int GetSaveStateSize();
+ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out);
+ static bool GetRegionRepeatMinMaxUV(int MSK, int FIX, int min, int max, int& min_out, int& max_out);
+ static bool GetClampWrapMinMaxUV(int SIZE, int WM, int MSK, int FIX, int min, int max, int& min_out, int& max_out);
private:
// RESTRICT prevents multiple loads of the same part of the register when accessing its bitfields (the compiler is happy to know that memory writes in-between will not go there)
@@ -361,6 +364,76 @@ public:
void CalculateDisplayOffset(bool scanmask);
} PCRTCDisplays;
+public:
+ struct Point
+ {
+ double x, y;
+ union
+ {
+ double U;
+ double S;
+ };
+ union
+ {
+ double T;
+ double V;
+ };
+ double Q;
+
+ Point(double x = 0, double y = 0, double US = NAN, double VT = NAN, double Q = NAN)
+ : x(x)
+ , y(y)
+ , U(US)
+ , V(VT)
+ , Q(Q)
+ {
+ }
+ };
+
+ struct EdgeFunction
+ {
+ double a, b, c;
+
+ EdgeFunction(double a, double b, double c)
+ : a(a)
+ , b(b)
+ , c(c)
+ {
+ }
+ };
+
+ enum class EdgeType
+ {
+ TOP,
+ RIGHT,
+ LEFT,
+ BOTTOM
+ };
+
+public:
+ static EdgeFunction GetEdgeFunction(const Point& a, const Point& b);
+ static bool CheckEdgeFunction(double e, EdgeType edgeType);
+ static std::tuple InterpolateEdgeFunctionsUV(
+ double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2);
+ static std::tuple InterpolateEdgeFunctionsSTQ(
+ double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2);
+ static bool CheckXYBounds(double x, double y, int minX, int minY, int maxX, int maxY, bool dxBigger, bool dyBigger);
+ static std::tuple SortPoints(Point p0, Point p1, Point p2);
+ static __forceinline bool CheckScissor(int x, int y, int SCAX0, int SCAY0, int SCAX1, int SCAY1);
+ static std::tuple SortAndClassifyTriangleVerts(Point p0, Point p1, Point p2);
+ static std::tuple CalculateUVHelper(int U, int V, int W, int H, const GIFRegCLAMP& clamp);
+ static void CalculateUV(double e01, double e12, double e20, EdgeType t0, EdgeType t1, EdgeType t2,
+ const Point& p0, const Point& p1, const Point& p2,
+ int W, int H, bool FST, bool bilinear, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV);
+ static void CheckScissorUV(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear,
+ const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV);
+ static void EdgeWalkTriangleMinMaxUVImpl(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear,
+ const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, bool switchOrient, int& minU, int& minV, int& maxU, int& maxV);
+ static void EdgeWalkTriangleMinMaxUV(Point p0, Point p1, Point p2, int W, int H, bool FST, bool bilinear, GIFRegSCISSOR scissor, GIFRegCLAMP clamp,
+ int& minU, int& minV, int& maxU, int& maxV);
+ void GetTriangleMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const;
+ void GetSpriteMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const;
+
public:
/// Returns the appropriate directory for draw dumping.
static std::string GetDrawDumpPath(const char* format, ...);
diff --git a/pcsx2/GS/Renderers/Common/GSVertexTraceFMM.cpp b/pcsx2/GS/Renderers/Common/GSVertexTraceFMM.cpp
index 4b9e5e5801..f7ebd4b60b 100644
--- a/pcsx2/GS/Renderers/Common/GSVertexTraceFMM.cpp
+++ b/pcsx2/GS/Renderers/Common/GSVertexTraceFMM.cpp
@@ -183,6 +183,14 @@ void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u
for (; i < (count - 1); i += 2) // 2x loop unroll
{
processVertices(v[index[i + 0]], v[index[i + 1]], true);
+ if (GSState::s_n == 58)
+ {
+ fprintf(stderr, "%d: %f %f %f %f\n", i, tmin.x, tmin.y, tmax.x, tmax.y);
+ }
+ if (GSState::s_n == 58 && i == 164)
+ {
+ fprintf(stderr, "hi\n");
+ }
}
if (count & 1)
{
diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp
index e6fa605090..c1b1b1d380 100644
--- a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp
+++ b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp
@@ -10,8 +10,21 @@
#include
+#include "debug.h"
+#if MY_DEBUG == 1
+#include
+
+extern bool savePoints;
+extern std::map> pointsHackRange;
+extern std::map> pointsSWRange;
+extern std::vector> pointsHackDebug;
+extern std::vector> pointsSWDebug;
+extern int s_n_debug;
+extern int* primIDSW;
+#else
// Comment to disable all dynamic code generation.
#define ENABLE_JIT_RASTERIZER
+#endif
#if MULTI_ISA_COMPILE_ONCE
// Lack of a better home
@@ -627,8 +640,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
}
}
+#if MY_DEBUG == 1
+ int x = left - 4;
+#endif
while (1)
{
+#if MY_DEBUG == 1
+ x += 4;
+#endif
+ if (global.s_n == 58 && top == 188 && (*primIDSW - 1 == 1094))
+ {
+ printf("");
+ }
do
{
int fa = 0, za = 0;
@@ -712,6 +735,33 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
case ZTST_GREATER: test |= zso <= zdo; break;
}
+#if MY_DEBUG == 1
+ if (savePoints)
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ if (!pointsSWRange.contains(global.s_n))
+ {
+ pointsSWRange[global.s_n] = {10000, 10000, -10000, -10000};
+ }
+ if (global.s_n == s_n_debug)
+ {
+ if (sel.ltf)
+ {
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
+ }
+ else
+ {
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
+ }
+ }
+ }
+ }
+#endif
+
if (test.alltrue())
continue;
}
@@ -1119,6 +1169,48 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
uv1 = clamp.blend8(repeat, VectorI::broadcast128(global.t.mask));
}
+#if MY_DEBUG == 1
+ if (savePoints)
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ if (test.U32[i] == 0 || sel.notest)
+ {
+ if (!pointsSWRange.contains(global.s_n))
+ {
+ pointsSWRange[global.s_n] = {10000, 10000, -10000, -10000};
+ }
+ std::get<0>(pointsSWRange[global.s_n]) = std::min(std::get<0>(pointsSWRange[global.s_n]), (int)uv0.U16[i]);
+ std::get<1>(pointsSWRange[global.s_n]) = std::min(std::get<1>(pointsSWRange[global.s_n]), (int)uv0.U16[i + 4]);
+ std::get<2>(pointsSWRange[global.s_n]) = std::max(std::get<2>(pointsSWRange[global.s_n]), (int)uv0.U16[i]);
+ std::get<3>(pointsSWRange[global.s_n]) = std::max(std::get<3>(pointsSWRange[global.s_n]), (int)uv0.U16[i + 4]);
+ if (sel.ltf)
+ {
+ std::get<0>(pointsSWRange[global.s_n]) = std::min(std::get<0>(pointsSWRange[global.s_n]), (int)uv1.U16[i]);
+ std::get<1>(pointsSWRange[global.s_n]) = std::min(std::get<1>(pointsSWRange[global.s_n]), (int)uv1.U16[i + 4]);
+ std::get<2>(pointsSWRange[global.s_n]) = std::max(std::get<2>(pointsSWRange[global.s_n]), (int)uv1.U16[i]);
+ std::get<3>(pointsSWRange[global.s_n]) = std::max(std::get<3>(pointsSWRange[global.s_n]), (int)uv1.U16[i + 4]);
+ }
+ if (global.s_n == s_n_debug)
+ {
+ if (sel.ltf)
+ {
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv0.U16[i + 4]});
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv1.U16[i + 4]});
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv1.U16[i], (int)uv0.U16[i + 4]});
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv1.U16[i], (int)uv1.U16[i + 4]});
+ }
+ else
+ {
+ pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv0.U16[i + 4]});
+ }
+ }
+
+ }
+ }
+ }
+#endif
+
VectorI y0 = uv0.uph16() << (sel.tw + 3);
VectorI x0 = uv0.upl16();
diff --git a/pcsx2/GS/Renderers/SW/GSRasterizer.cpp b/pcsx2/GS/Renderers/SW/GSRasterizer.cpp
index dda8409316..c045beed1f 100644
--- a/pcsx2/GS/Renderers/SW/GSRasterizer.cpp
+++ b/pcsx2/GS/Renderers/SW/GSRasterizer.cpp
@@ -19,6 +19,21 @@ MULTI_ISA_UNSHARED_IMPL;
int GSRasterizerData::s_counter = 0;
+#include "debug.h"
+#if MY_DEBUG == 1
+extern bool savePoints;
+extern int s_n_debug;
+extern int s_n_exit;
+extern int primID;
+extern int* primIDSW;
+extern std::map> pointsHackRange;
+extern std::map> pointsSWRange;
+extern std::vector> pointsHackDebug;
+extern std::vector> pointsSWDebug;
+extern std::map, std::tuple> pointsHackDebugOrig;
+extern std::map, std::tuple> pointsSWDebugOrig;
+#endif
+
static int compute_best_thread_height(int threads)
{
// - for more threads screen segments should be smaller to better distribute the pixels
@@ -56,6 +71,10 @@ GSRasterizer::GSRasterizer(GSDrawScanline* ds, int id, int threads)
{
m_scanline[i] = (i % threads) == id ? 1 : 0;
}
+
+#if MY_DEBUG == 1
+ primIDSW = &m_primcount;
+#endif
}
GSRasterizer::~GSRasterizer()
@@ -597,6 +616,19 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRIC
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index)
{
+#if MY_DEBUG == 1
+ if (GSState::s_n == s_n_debug)
+ {
+ // FIXME; WHAT IS SCALING FOR UV?
+ double scaleTX = 1 / (double)(1 << m_local.gd->TW) / 256.0 / 256.0;
+ double scaleTY = 1 / (double)(1 << m_local.gd->TH) / 256.0 / 256.0;
+ pointsSWDebugOrig[{m_primcount, 0}] = {vertex[index[0]].p.x, vertex[index[0]].p.y, vertex[index[0]].t.x * scaleTX, vertex[index[0]].t.y * scaleTY};
+ pointsSWDebugOrig[{m_primcount, 1}] = {vertex[index[1]].p.x, vertex[index[1]].p.y, vertex[index[1]].t.x * scaleTX, vertex[index[1]].t.y * scaleTY};
+ pointsSWDebugOrig[{m_primcount, 2}] = {vertex[index[2]].p.x, vertex[index[2]].p.y, vertex[index[2]].t.x * scaleTX, vertex[index[2]].t.y * scaleTY};
+ }
+#endif
+
+
m_primcount++;
GSVertexSW edge;
diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp
index 01662d605c..f09b6038be 100644
--- a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp
+++ b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp
@@ -10,8 +10,25 @@
MULTI_ISA_UNSHARED_IMPL;
+#define USE_HACK 1
+
+#include "debug.h"
+
+#if MY_DEBUG == 1
+extern bool savePoints;
+extern int s_n_debug;
+extern int s_n_exit;
+extern std::map> pointsHackRange;
+extern std::map> pointsSWRange;
+extern void dumpRanges();
+#endif
+
GSRenderer* CURRENT_ISA::makeGSRendererSW(int threads)
{
+#if MY_DEBUG == 1
+ if (savePoints)
+ threads = 0;
+#endif
return new GSRendererSW(threads);
}
@@ -431,8 +448,6 @@ void GSRendererSW::Draw()
sd->UsePages(fb_pages, m_context->offset.fb.psm(), zb_pages, m_context->offset.zb.psm());
- //
-
if (GSConfig.DumpGSData)
{
Sync(2);
@@ -556,6 +571,11 @@ void GSRendererSW::Queue(GSRingHeap::SharedPtr& item)
fflush(s_fp);
}
+#if MY_DEBUG == 1
+ sd->global.s_n = s_n;
+ sd->global.TW = m_context->TEX0.TW;
+ sd->global.TH = m_context->TEX0.TH;
+#endif
m_rl->Queue(item);
// invalidate new parts rendered onto
@@ -1057,7 +1077,54 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap);
- GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
+ GSVector4i r;
+#if MY_DEBUG == 1
+ fprintf(stderr, "%d\n", s_n);
+#endif
+ if (m_vt.m_primclass == GS_TRIANGLE_CLASS && USE_HACK)
+ {
+ int minU = std::numeric_limits::max();
+ int minV = std::numeric_limits::max();
+ int maxU = std::numeric_limits::min();
+ int maxV = std::numeric_limits::min();
+ //GetTriangleMinMaxUV(1 << TEX0.TW, 1 << TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV);
+ GetTriangleMinMaxUV(1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV);
+
+ r = GSVector4i(minU, minV, maxU + 1, maxV + 1);
+#if MY_DEBUG == 1
+ if (s_n == 58)
+ {
+ printf("");
+ }
+ GSVector4i r2 = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
+ if (savePoints)
+ {
+ pointsHackRange[s_n] = {minU, minV, maxU, maxV};
+ }
+#endif
+ }
+ else if (m_vt.m_primclass == GS_SPRITE_CLASS && USE_HACK)
+ {
+ int minU = std::numeric_limits::max();
+ int minV = std::numeric_limits::max();
+ int maxU = std::numeric_limits::min();
+ int maxV = std::numeric_limits::min();
+ //GetSpriteMinMaxUV(1 << TEX0.TW, 1 << TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV);
+ GetSpriteMinMaxUV(1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV);
+
+ r = GSVector4i(minU, minV, maxU + 1, maxV + 1);
+#if MY_DEBUG == 1
+ GSVector4i r2 = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
+ if (savePoints)
+ {
+ pointsHackRange[s_n] = {minU, minV, maxU, maxV};
+ }
+#endif
+ }
+ else
+ {
+ r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
+ }
GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA);
diff --git a/pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h b/pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h
index 9ae55a2ab9..a823187e2d 100644
--- a/pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h
+++ b/pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h
@@ -9,6 +9,8 @@
#include
#include
+#include "debug.h"
+
union GSScanlineSelector
{
struct
@@ -117,6 +119,12 @@ union GSScanlineSelector
struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a pixel shader constant buffer
{
+ // TODO: REMOVE DEBUG CODE
+#if MY_DEBUG == 1
+ int s_n = -1;
+ int TW = -1, TH = -1;
+#endif
+
GSScanlineSelector sel;
// - the data of vm, tex may change, multi-threaded drawing must be finished before that happens, clut and dimx are copies
diff --git a/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp b/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp
index 260e9930e9..5cce41cd46 100644
--- a/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp
+++ b/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp
@@ -215,7 +215,8 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
GSVector4i r = rect;
- r = r.ralign(bs);
+ // FIXME: Is this an ok rintersect hack to make sure we don't read outside the texture?
+ r = r.ralign(bs).rintersect(GSVector4i(0, 0, tw, th));
if (r.eq(GSVector4i(0, 0, tw, th)))
{
diff --git a/pcsx2/debug.h b/pcsx2/debug.h
new file mode 100644
index 0000000000..7c5a473b65
--- /dev/null
+++ b/pcsx2/debug.h
@@ -0,0 +1,2 @@
+#pragma once
+#define MY_DEBUG 1
\ No newline at end of file
diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj
index 1d2efe083b..41e17c4f60 100644
--- a/pcsx2/pcsx2.vcxproj
+++ b/pcsx2/pcsx2.vcxproj
@@ -592,6 +592,7 @@
+
@@ -1025,4 +1026,4 @@
-
+
\ No newline at end of file
diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters
index e72297d4b7..75f1a11dc0 100644
--- a/pcsx2/pcsx2.vcxproj.filters
+++ b/pcsx2/pcsx2.vcxproj.filters
@@ -220,15 +220,6 @@
{eb697f5b-85f5-424a-a7e4-8d8b73d3426e}
-
- {9153e32b-e1e3-49ac-b490-b56adfd1692f}
-
-
- {03ba2aa7-2cd9-48cb-93c6-fc93d5bdc938}
-
-
- {78c9db9c-9c7c-4385-90e7-9fa71b922f60}
-
{e068b724-9319-42e5-9ea7-63d80989ea1d}
@@ -283,12 +274,21 @@
{8aea3ae6-9722-463a-94ac-34f3738a3153}
-
- {cf847f4e-744e-4c27-a7ac-8564726fb4e6}
-
{cd8ec519-2196-43f7-86de-7faced2d4296}
+
+ {9153e32b-e1e3-49ac-b490-b56adfd1692f}
+
+
+ {03ba2aa7-2cd9-48cb-93c6-fc93d5bdc938}
+
+
+ {78c9db9c-9c7c-4385-90e7-9fa71b922f60}
+
+
+ {cf847f4e-744e-4c27-a7ac-8564726fb4e6}
+
@@ -1119,19 +1119,19 @@
System\Ps2\GS\Renderers\Direct3D12
- Tools\Input Recording
+ System\Tools\Input Recording
- Tools\Input Recording
+ System\Tools\Input Recording
- Tools\Input Recording
+ System\Tools\Input Recording
- Tools\Input Recording
+ System\Tools\Input Recording
- Tools\Input Recording\Utilities
+ System\Tools\Input Recording\Utilities
System\Ps2\USB
@@ -1233,7 +1233,7 @@
System\Ps2\SPU2
- Tools
+ System\Tools
System\Ps2\GS\Renderers\Direct3D11
@@ -1302,7 +1302,7 @@
Misc
- Tools
+ System\Tools
Misc\Input
@@ -1329,7 +1329,7 @@
Misc
- Tools
+ System\Tools
Misc
@@ -1432,7 +1432,7 @@
System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec\arm64
- Tools\arm64
+ System\Tools\arm64
System\Ps2\EmotionEngine\EE\Dynarec\arm64
@@ -2059,19 +2059,19 @@
System\Ps2\GS\Renderers\Direct3D12
- Tools\Input Recording
+ System\Tools\Input Recording
- Tools\Input Recording
+ System\Tools\Input Recording
- Tools\Input Recording
+ System\Tools\Input Recording
- Tools\Input Recording
+ System\Tools\Input Recording
- Tools\Input Recording\Utilities
+ System\Tools\Input Recording\Utilities
System\Include
@@ -2191,7 +2191,7 @@
System\Ps2\IPU
- Tools
+ System\Tools
System\Ps2\GS\GIF
@@ -2391,7 +2391,7 @@
System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec\arm64
- Tools\arm64
+ System\Tools\arm64
System\Ps2\Iop\SIO\PAD
@@ -2399,6 +2399,9 @@
System\Ps2\Iop\SIO\PAD
+
+ System\Ps2\GS
+
@@ -2428,4 +2431,4 @@
System\Ps2\GS
-
+
\ No newline at end of file