GS: UV calculation using triangle edge rasterization.

This commit is contained in:
TJnotJT 2025-01-14 20:00:49 -05:00
parent f509fb6950
commit ca161224eb
14 changed files with 1231 additions and 48 deletions

View File

@ -52,10 +52,10 @@
<!-- MSVC automatically adds __AVX__ and __AVX2__ appropriately -->
<PreprocessorDefinitions Condition="'$(Platform)'=='x64'">_M_X86;__SSE4_1__;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet Condition="!$(Configuration.Contains(AVX2)) Or $(Configuration.Contains(Clang))">NotSet</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet Condition="$(Configuration.Contains(AVX2)) And !$(Configuration.Contains(Clang))">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet Condition="'$(Platform)'=='ARM64' Or !$(Configuration.Contains(AVX2))">NotSet</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet Condition="'$(Platform)'=='x64' And $(Configuration.Contains(AVX2))">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<!-- Allow SSE4 intrinsics on non-AVX Clang-cl builds -->
<AdditionalOptions Condition="'$(Platform)'=='x64' And $(Configuration.Contains(Clang)) And !$(Configuration.Contains(AVX2))"> -march=nehalem %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Platform)'=='x64' And $(Configuration.Contains(Clang)) And $(Configuration.Contains(AVX2))"> -march=haswell %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Platform)'=='ARM64' And $(Configuration.Contains(Clang))"> -march=armv8.4-a %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="!$(Configuration.Contains(Clang))">%(AdditionalOptions) /Zc:externConstexpr /Zc:__cplusplus /Zo /utf-8</AdditionalOptions>

View File

@ -45,6 +45,12 @@
#include "pcsx2/VMManager.h"
#include "svnrev.h"
#include "debug.h"
#if MY_DEBUG == 1
#include <cstdlib>
extern bool savePoints;
#endif
namespace GSRunner
{
@ -141,6 +147,20 @@ bool GSRunner::InitializeConfig()
si.SetStringValue("MemoryCards", fmt::format("Slot{}_Filename", i + 1).c_str(), "");
}
#if MY_DEBUG == 1
if (false)
{
si.SetBoolValue("EmuCore/GS", "dump", true);
si.SetIntValue("EmuCore/GS", "saven", 0);
si.SetIntValue("EmuCore/GS", "savel", 100);
si.SetBoolValue("EmuCore/GS", "save", true);
si.SetBoolValue("EmuCore/GS", "savef", true);
si.SetBoolValue("EmuCore/GS", "savet", true);
si.SetBoolValue("EmuCore/GS", "savez", true);
si.SetStringValue("EmuCore/GS", "HWDumpDirectory", "C:\\Users\\tchan\\Desktop\\ps2_debug");
si.SetStringValue("EmuCore/GS", "SWDumpDirectory", "C:\\Users\\tchan\\Desktop\\ps2_debug");
}
#endif
VMManager::Internal::LoadStartupSettings();
return true;
}
@ -857,8 +877,16 @@ LRESULT CALLBACK WndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam)
return DefWindowProcW(hwnd, msg, wParam, lParam);
}
#if MY_DEBUG == 1
extern void dumpRanges();
#endif
int wmain(int argc, wchar_t** argv)
{
#if MY_DEBUG == 1
if (savePoints)
atexit(dumpRanges);
#endif
std::vector<std::string> u8_args;
u8_args.reserve(static_cast<size_t>(argc));
for (int i = 0; i < argc; i++)

View File

@ -5,7 +5,11 @@
#include "GS/GSGL.h"
#include "GS/GS.h"
#include "GS/GSUtil.h"
#include "GS/GSState.h"
// FIXME: RENAME THIS FUNCTION AND CHANGE ARGS NAMES TO BE NICER!
// MAKE SURE BEING CALLED WITH ARGS IN THE RIGHT ORDER!
static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv)
{
// return max possible texcoord.
@ -38,10 +42,8 @@ static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv)
{
// REGION_REPEAT adhears to the original texture size, even if offset outside the texture (with MAXUV).
minuv &= limit;
if (tl < 0)
uv = minuv | maxuv; // wrap around, just use (any & mask) | fix.
else
uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask).
int ignore;
GSState::UsesRegionRepeat(maxuv, minuv, tl, br, &ignore, &uv);
}
return uv;
@ -130,18 +132,18 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
if (tw + th >= 19) // smaller sizes aren't worth, they just create multiple entries in the textue cache and the saved memory is less
{
tw = reduce(uv.x, tw);
th = reduce(uv.y, th);
tw = reduce(uv.x + 1, tw);
th = reduce(uv.y + 1, th);
}
if (wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT)
{
tw = extend(uv.x, tw);
tw = extend(uv.x + 1, tw);
}
if (wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT)
{
th = extend(uv.y, th);
th = extend(uv.y + 1, th);
}
GIFRegTEX0 res = TEX0;

View File

@ -18,10 +18,774 @@
#include <iomanip>
#include <bit>
#include "debug.h"
#define WALK_FULL_EDGE 1
int GSState::s_n = 0;
int GSState::s_last_transfer_draw_n = 0;
int GSState::s_transfer_n = 0;
#if MY_DEBUG == 1
bool savePoints = false;
int s_n_debug = -1;
int s_n_exit = -1;
int primID = 0;
int* primIDSW = 0;
std::map<int, std::tuple<int, int, int, int>> pointsHackRange;
std::map<int, std::tuple<int, int, int, int>> pointsSWRange;
std::vector<std::tuple<int, int, int, int, int>> pointsHackDebug;
std::vector<std::tuple<int, int, int, int, int>> pointsSWDebug;
std::map<std::tuple<int, int>, std::tuple<double, double, double, double>> pointsHackDebugOrig;
std::map<std::tuple<int, int>, std::tuple<double, double, double, double>> pointsSWDebugOrig;
void sortPoints(std::vector<std::tuple<int, int, int, int, int>>& v)
{
std::sort(v.begin(), v.end(), [](const std::tuple<int, int, int, int, int>& a, const std::tuple<int, int, int, int, int>& b) {
int ai[5] = {std::get<0>(a), std::get<2>(a), std::get<1>(a), std::get<3>(a), std::get<4>(a)}; // prim, y, x, u, v
int bi[5] = {std::get<0>(b), std::get<2>(b), std::get<1>(b), std::get<3>(b), std::get<4>(b)}; // prim, y, x, u, v
for (int i = 0; i < 5; i++)
{
if (ai[i] < bi[i])
return true;
if (ai[i] > bi[i])
return false;
}
return false;
});
}
void dumpRanges()
{
FILE* file = fopen("C:\\Users\\tchan\\Desktop\\log_files\\pointsHackRange.txt", "w");
for (auto it = pointsHackRange.begin(); it != pointsHackRange.end(); it++)
{
auto [x, y, u, v] = it->second;
fprintf(file, "%d,%d,%d,%d,%d\n", it->first, x, y, u, v);
}
fclose(file);
char fileName[1024];
sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsHackDebug_%d.txt", s_n_debug);
file = fopen(fileName, "w");
sortPoints(pointsHackDebug);
for (auto it = pointsHackDebug.begin(); it != pointsHackDebug.end(); it++)
{
auto [n, x, y, u, v] = *it;
fprintf(file, "%d,%d,%d,%d,%d\n", n, x, y, u, v);
}
fclose(file);
sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsHackDebugOrig_%d.txt", s_n_debug);
file = fopen(fileName, "w");
for (auto it = pointsHackDebugOrig.begin(); it != pointsHackDebugOrig.end(); it++)
{
auto [prim_id, vert_id] = it->first;
auto [x, y, u, v] = it->second;
fprintf(file, "%d,%d,%f,%f,%f,%f Hack\n", prim_id, vert_id, x, y, u, v);
}
fclose(file);
file = fopen("C:\\Users\\tchan\\Desktop\\log_files\\pointsSWRange.txt", "w");
for (auto it = pointsSWRange.begin(); it != pointsSWRange.end(); it++)
{
auto [x, y, u, v] = it->second;
fprintf(file, "%d,%d,%d,%d,%d\n", it->first, x, y, u, v);
}
fclose(file);
sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsSWDebug_%d.txt", s_n_debug);
file = fopen(fileName, "w");
for (auto it = pointsSWDebug.begin(); it != pointsSWDebug.end(); it++)
{
auto [n, x, y, u, v] = *it;
fprintf(file, "%d,%d,%d,%d,%d\n", n, x, y, u, v);
}
fclose(file);
sprintf(&fileName[0], "C:\\Users\\tchan\\Desktop\\log_files\\pointsSWDebugOrig_%d.txt", s_n_debug);
file = fopen(fileName, "w");
sortPoints(pointsSWDebug);
for (auto it = pointsSWDebugOrig.begin(); it != pointsSWDebugOrig.end(); it++)
{
auto [prim_id, vert_id] = it->first;
auto [x, y, u, v] = it->second;
fprintf(file, "%d,%d,%f,%f,%f,%f SW\n", prim_id, vert_id, x, y, u, v);
}
fclose(file);
// Compare
for (auto it = pointsHackRange.begin(); it != pointsHackRange.end(); it++)
{
if (pointsSWRange.contains(it->first))
{
auto [x1, y1, u1, v1] = it->second;
auto [x2, y2, u2, v2] = pointsSWRange[it->first];
if (x1 != x2 || y1 != y2 || u1 != u2 || v1 != v2)
{
char c[1024];
sprintf(c, "Wrong range: %d; %d %d %d %d; %d %d %d %d;", it->first, x1, y1, u1, v1, x2, y2, u2, v2);
//throw std::exception(c);
}
}
}
}
#endif
__forceinline GSState::EdgeFunction GSState::GetEdgeFunction(const Point& a, const Point& b)
{
return {a.y - b.y, b.x - a.x, a.x * b.y - a.y * b.x};
}
__forceinline bool GSState::CheckEdgeFunction(double e, EdgeType edgeType)
{
if (edgeType == EdgeType::TOP || edgeType == EdgeType::LEFT)
{
return e >= 0;
}
else if (edgeType == EdgeType::RIGHT || edgeType == EdgeType::BOTTOM)
{
return e > 0;
}
else
{
pxFail("Invalid edge type");
}
}
__forceinline std::tuple<double, double> GSState::InterpolateEdgeFunctionsUV(
double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2)
{
return {(e12 * p0.U + e20 * p1.U + e01 * p2.U) / (e01 + e12 + e20), (e12 * p0.V + e20 * p1.V + e01 * p2.V) / (e01 + e12 + e20)};
}
__forceinline std::tuple<double, double, double> GSState::InterpolateEdgeFunctionsSTQ(
double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2)
{
double S = (e12 * p0.S + e20 * p1.S + e01 * p2.S) / (e01 + e12 + e20);
double T = (e12 * p0.T + e20 * p1.T + e01 * p2.T) / (e01 + e12 + e20);
double Q = (e12 * p0.Q + e20 * p1.Q + e01 * p2.Q) / (e01 + e12 + e20);
return {S, T, Q};
}
__forceinline bool GSState::CheckXYBounds(double x, double y, int minX, int minY, int maxX, int maxY, bool dxBigger, bool dyBigger)
{
if (dxBigger)
{
return minX <= x && x <= maxX;
} else if (dyBigger)
{
return minY <= y && y <= maxY;
} else
{
return (minX <= x && x <= maxX) || (minY <= y && y <= maxY);
}
}
// Order points by y (ascending) then x (ascending)
// Make sure conforms to rule where interior of triangle is on the right of each directed edge.
__forceinline std::tuple<GSState::Point, GSState::Point, GSState::Point> GSState::SortPoints(Point p0, Point p1, Point p2)
{
if ((p1.x - p0.x) * (p2.y - p0.y) - (p1.y - p0.y) * (p2.x - p0.x) < 0)
{
std::swap(p1, p2);
}
Point p[3] = {p0, p1, p2};
int i0 = 0;
for (int i = 1; i < 3; ++i)
{
if (p[i].y < p[i0].y || (p[i].y == p[i0].y && p[i].x < p[i0].x))
{
i0 = i;
}
}
return {p[i0], p[(i0 + 1) % 3], p[(i0 + 2) % 3]};
}
// Order the points so that p0-p1 is a top or right edge, p1-p2 is a right or bottom edge, and p0-p2 is a left edge.
__forceinline std::tuple<GSState::Point, GSState::Point, GSState::Point, GSState::EdgeType, GSState::EdgeType, GSState::EdgeType>
GSState::SortAndClassifyTriangleVerts(Point p0, Point p1, Point p2)
{
std::tie(p0, p1, p2) = SortPoints(p0, p1, p2);
EdgeType t01 = (p0.y == p1.y) ? EdgeType::TOP : EdgeType::RIGHT;
EdgeType t12 = (p1.y < p2.y) ? EdgeType::RIGHT : (p1.y > p2.y ? EdgeType::LEFT : EdgeType::BOTTOM);
EdgeType t20 = EdgeType::LEFT;
return {p0, p1, p2, t01, t12, t20};
}
__forceinline bool GSState::CheckScissor(int x, int y, int SCAX0, int SCAY0, int SCAX1, int SCAY1)
{
return (SCAX0 <= x && x <= SCAX1) && (SCAY0 <= y && y <= SCAY1);
}
// Calculate the final UV coordinates of the rasterized points by performing wrapping/clamping.
// Then adjust the min/max UV values accordingly.
// TODO: We can remove this function. Not used.
std::tuple<int, int> GSState::CalculateUVHelper(int U, int V, int W, int H, const GIFRegCLAMP& clamp)
{
// Initial clamping done on all UVs
U = std::max(-2047, std::min(2047, U));
V = std::max(-2047, std::min(2047, V));
// Clamping/wrapping for U
const int MINU = static_cast<int>(clamp.MINU);
const int MAXU = static_cast<int>(clamp.MAXU);
const int MSKU = MINU, FIXU = MAXU;
switch (clamp.WMS) // U clamping/wrapping mode
{
case CLAMP_REPEAT:
U &= W - 1; // W is a power of 2
break;
case CLAMP_CLAMP:
U = std::max(0, std::min(W - 1, U));
break;
case CLAMP_REGION_CLAMP:
U = std::max(MINU, std::min(MAXU, U));
break;
case CLAMP_REGION_REPEAT:
U = (U & MSKU) | FIXU;
break;
}
// Clamping/wrapping for V
int MINV = static_cast<int>(clamp.MINV);
int MAXV = static_cast<int>(clamp.MAXV);
int MSKV = MINV, FIXV = MAXV;
switch (clamp.WMT) // V clamping/wrapping mode
{
case CLAMP_REPEAT:
V &= H - 1; // H is a power of 2
break;
case CLAMP_CLAMP:
V = std::max(0, std::min(H - 1, V));
break;
case CLAMP_REGION_CLAMP:
V = std::max(MINV, std::min(MAXV, V));
break;
case CLAMP_REGION_REPEAT:
V = (V & MSKV) | FIXV;
break;
default:
ASSUME(0);
}
return {U, V};
}
#if MY_DEBUG == 1
bool g_switchOrient = false;
#endif
void GSState::CalculateUV(double e01, double e12, double e20, EdgeType t0, EdgeType t1, EdgeType t2,
const Point& p0, const Point& p1, const Point& p2,
int W, int H, bool FST, bool bilinear, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV)
{
double U, V;
if (FST)
{
std::tie(U, V) = InterpolateEdgeFunctionsUV(e01, e12, e20, p0, p1, p2);
}
else
{
auto [S, T, Q] = InterpolateEdgeFunctionsSTQ(e01, e12, e20, p0, p1, p2);
U = W * (S / Q);
V = H * (T / Q);
}
// Clamp to valid UV range
U = std::max(-2047.0, std::min(2047.0, U));
V = std::max(-2047.0, std::min(2047.0, V));
if (bilinear)
{
#if MY_DEBUG == 1
if (GSState::s_n == s_n_debug)
{
int x = (int)((e01 * p2.x + e12 * p0.x + e20 * p1.x) / (e01 + e12 + e20));
int y = (int)((e01 * p2.y + e12 * p0.y + e20 * p1.y) / (e01 + e12 + e20));
if (g_switchOrient)
y = -y;
pointsHackDebug.push_back({primID, x, y, (int)std::floor(U - 0.5), (int)std::floor(V - 0.5)});
pointsHackDebug.push_back({primID, x, y, (int)std::floor(U - 0.5), (int)std::floor(V + 0.5)});
pointsHackDebug.push_back({primID, x, y, (int)std::floor(U + 0.5), (int)std::floor(V - 0.5)});
pointsHackDebug.push_back({primID, x, y, (int)std::floor(U + 0.5), (int)std::floor(V + 0.5)});
}
#endif
minU = std::min(static_cast<int>(std::floor(U - 0.5)), minU);
minV = std::min(static_cast<int>(std::floor(V - 0.5)), minV);
maxU = std::max(static_cast<int>(std::floor(U + 0.5)), maxU);
maxV = std::max(static_cast<int>(std::floor(V + 0.5)), maxV);
}
else
{
#if MY_DEBUG == 1
if (GSState::s_n == s_n_debug)
{
int x = (int)((e01 * p2.x + e12 * p0.x + e20 * p1.x) / (e01 + e12 + e20));
int y = (int)((e01 * p2.y + e12 * p0.y + e20 * p1.y) / (e01 + e12 + e20));
if (g_switchOrient)
y = -y;
pointsHackDebug.push_back({primID, x, y, (int)std::floor(U), (int)std::floor(V)});
}
#endif
minU = std::min(static_cast<int>(std::floor(U)), minU);
minV = std::min(static_cast<int>(std::floor(V)), minV);
maxU = std::max(static_cast<int>(std::floor(U)), maxU);
maxV = std::max(static_cast<int>(std::floor(V)), maxV);
}
}
// Test if any of the 4 scissor corners will be rasterized.
// Helps with getting the proper min/max when the triangle is scissored
void GSState::CheckScissorUV(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear,
const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV)
{
int SCAX0 = static_cast<int>(scissor.SCAX0);
int SCAY0 = static_cast<int>(scissor.SCAY0);
int SCAX1 = static_cast<int>(scissor.SCAX1);
int SCAY1 = static_cast<int>(scissor.SCAY1);
// Get edge function coefficients
EdgeFunction E01 = GetEdgeFunction(p0, p1);
EdgeFunction E12 = GetEdgeFunction(p1, p2);
EdgeFunction E20 = GetEdgeFunction(p2, p0);
for (int x : {SCAX0, SCAX1})
{
for (int y : {SCAY0, SCAY1})
{
double e01 = E01.a * x + E01.b * y + E01.c;
double e12 = E12.a * x + E12.b * y + E12.c;
double e20 = E20.a * x + E20.b * y + E20.c;
if (CheckEdgeFunction(e01, t01) && CheckEdgeFunction(e12, t12) && CheckEdgeFunction(e20, t20))
{
CalculateUV(e01, e12, e20, t01, t12, t20, p0, p1, p2, W, H, FST, bilinear, clamp, minU, minV, maxU, maxV);
}
}
}
}
void GSState::EdgeWalkTriangleMinMaxUVImpl(Point p0, Point p1, Point p2,
EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear,
const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, bool switchOrient, int& minU, int& minV, int& maxU, int& maxV)
{
int SCAX0 = static_cast<int>(scissor.SCAX0);
int SCAY0 = static_cast<int>(scissor.SCAY0);
int SCAX1 = static_cast<int>(scissor.SCAX1);
int SCAY1 = static_cast<int>(scissor.SCAY1);
#if MY_DEBUG == 1
g_switchOrient = switchOrient;
#endif
// To traverse in the opposite direction we swap the first and second points
// and flip the whole triangle vertically (this is so that right-hand interior rule can still be followed).
// This only affects XY so doesn't matter for computing UV ranges.
if (switchOrient)
{
std::swap(p0, p1);
std::swap(t12, t20);
p0.y = -p0.y;
p1.y = -p1.y;
p2.y = -p2.y;
std::tie(SCAY0, SCAY1) = std::tuple(-SCAY1, -SCAY0);
};
// Get edge function coefficients
EdgeFunction E01 = GetEdgeFunction(p0, p1);
EdgeFunction E12 = GetEdgeFunction(p1, p2);
EdgeFunction E20 = GetEdgeFunction(p2, p0);
// Initialize deltas and steps
double dx = p1.x - p0.x;
double dy = p1.y - p0.y;
int sx = (dx > 0) ? 1.0 : -1.0;
int sy = (dy > 0) ? 1.0 : -1.0;
// Initialize starting point by rounding correctly
// Use right-hand-interior rule and brute force case analysis to get the correct starting point
// FIXME: THIS IS WRONG!!! THERE CAN REALLY BE 3 CANDIDDATE POINTS
int x, y;
if (dy == 0.0)
{
if (dx > 0.0)
{
x = static_cast<int>(std::floor(p0.x));
y = static_cast<int>(std::ceil(p0.y));
sy = 1;
}
else
{
x = static_cast<int>(std::ceil(p0.x));
y = static_cast<int>(std::floor(p0.y));
sy = -1;
}
}
else if (dx == 0.0)
{
if (dy > 0.0)
{
y = static_cast<int>(std::floor(p0.y));
x = static_cast<int>(std::floor(p0.x));
sx = -1;
}
else
{
y = static_cast<int>(std::ceil(p0.y));
x = static_cast<int>(std::ceil(p0.x));
sx = 1;
}
}
else if ((dx > 0.0) && (dy > 0.0))
{
if (E01.a * std::ceil(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0.0)
{
x = static_cast<int>(std::ceil(p0.x));
y = static_cast<int>(std::ceil(p0.y));
}
else if (E01.a * std::floor(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0.0)
{
x = static_cast<int>(std::floor(p0.x));
y = static_cast<int>(std::ceil(p0.y));
}
else
{
// Should be unreachable
pxFail("Invalid edge function");
}
}
else if ((dx < 0.0) && (dy > 0.0))
{
if (E01.a * std::floor(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0)
{
x = static_cast<int>(std::floor(p0.x));
y = static_cast<int>(std::ceil(p0.y));
}
else if (E01.a * std::floor(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0)
{
x = static_cast<int>(std::floor(p0.x));
y = static_cast<int>(std::floor(p0.y));
}
else
{
// Should be unreachable
pxFail("Invalid edge function");
}
}
else if ((dx < 0.0) && (dy < 0.0))
{
if (E01.a * std::floor(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0)
{
x = static_cast<int>(std::floor(p0.x));
y = static_cast<int>(std::floor(p0.y));
}
else if (E01.a * std::ceil(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0)
{
x = static_cast<int>(std::ceil(p0.x));
y = static_cast<int>(std::floor(p0.y));
}
else
{
// Should be unreachable
pxFail("Invalid edge function");
}
}
else if ((dx > 0) && (dy < 0))
{
if (E01.a * std::ceil(p0.x) + E01.b * std::floor(p0.y) + E01.c >= 0)
{
x = static_cast<int>(std::ceil(p0.x));
y = static_cast<int>(std::floor(p0.y));
}
else if (E01.a * std::ceil(p0.x) + E01.b * std::ceil(p0.y) + E01.c >= 0)
{
x = static_cast<int>(std::ceil(p0.x));
y = static_cast<int>(std::ceil(p0.y));
}
else
{
pxFail("Invalid edge function");
}
}
// Initialize edge function values
double e01 = E01.a * x + E01.b * y + E01.c;
double e12 = E12.a * x + E12.b * y + E12.c;
double e20 = E20.a * x + E20.b * y + E20.c;
// Bounding box of the edge
const int minX = static_cast<int>(std::floor(std::min(p0.x, p1.x)));
const int minY = static_cast<int>(std::floor(std::min(p0.y, p1.y)));
const int maxX = static_cast<int>(std::ceil(std::max(p0.x, p1.x)));
const int maxY = static_cast<int>(std::ceil(std::max(p0.y, p1.y)));
bool dxBigger = std::abs(dx) > std::abs(dy);
bool dyBigger = std::abs(dy) > std::abs(dx);
// Walk along the longest axis
while (CheckXYBounds(x, y, minX, minY, maxX, maxY, dxBigger, dyBigger)) // Check if the current point is inside the bounds of the line
{
// Update min/max UV values if the current point is inside the triangle and XY is inside the scissor
if (CheckEdgeFunction(e01, t01) && CheckEdgeFunction(e12, t12) && CheckEdgeFunction(e20, t20) && CheckScissor(x, y, SCAX0, SCAY0, SCAX1, SCAY1))
{
CalculateUV(e01, e12, e20, t01, t12, t20, p0, p1, p2, W, H, FST, bilinear, clamp, minU, minV, maxU, maxV);
// FIXME: TEST WITH dx == 0 and dy == 0 optimization for
// horiz and vert edges!
if (!WALK_FULL_EDGE)
// if (!WALK_FULL_EDGE || dx == 0 || dy == 0)
{
// Break after the first rasterizable point. If a horizontal or vertical edge, this should be sufficient
// since we walk along the edge in both directions.
// Otherwise, this might not always be the "most outside" point but should be much faster.
break;
}
}
// Test the horizontal and vertical edge functions
double e01x = e01 + E01.a * sx;
double e01y = e01 + E01.b * sy;
if (!CheckEdgeFunction(e01x, t01))
{
// Cannot go horizontally, so go vertically
y += sy;
e01 += E01.b * sy;
e12 += E12.b * sy;
e20 += E20.b * sy;
}
else if (!CheckEdgeFunction(e01y, t01))
{
// Cannot go vertically, so go horizontally
x += sx;
e01 += E01.a * sx;
e12 += E12.a * sx;
e20 += E20.a * sx;
}
else if (e01x < e01y)
{
// Can go both, but horizontal is closer to the line
x += sx;
e01 += E01.a * sx;
e12 += E12.a * sx;
e20 += E20.a * sx;
}
else if (e01x > e01y)
{
// Can go both, but diagonal is closer to the line
y += sy;
e01 += E01.b * sy;
e12 += E12.b * sy;
e20 += E20.b * sy;
}
else
{
// Should be unreachable
pxFail("Invalid edge function");
}
}
}
void GSState::EdgeWalkTriangleMinMaxUV(Point p0, Point p1, Point p2, int W, int H, bool FST, bool bilinear, GIFRegSCISSOR scissor, GIFRegCLAMP clamp,
int& minU, int& minV, int& maxU, int& maxV)
{
// Rearrange the points in the correct order and walk along all edges
auto [v0_, v1_, v2_, t01, t12, t20] = SortAndClassifyTriangleVerts(p0, p1, p2);
// Check if the corners of the scissor region are inside the triangle
CheckScissorUV(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, minU, minV, maxU, maxV);
// Walk along edges in the clockwise direction (if Y-axis points down)
EdgeWalkTriangleMinMaxUVImpl(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV);
EdgeWalkTriangleMinMaxUVImpl(v1_, v2_, v0_, t12, t20, t01, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV);
EdgeWalkTriangleMinMaxUVImpl(v2_, v0_, v1_, t20, t01, t12, W, H, FST, bilinear, scissor, clamp, false, minU, minV, maxU, maxV);
// Walk along edges in the anti-clockwise direction (if Y-axis points down)
EdgeWalkTriangleMinMaxUVImpl(v0_, v1_, v2_, t01, t12, t20, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV);
EdgeWalkTriangleMinMaxUVImpl(v1_, v2_, v0_, t12, t20, t01, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV);
EdgeWalkTriangleMinMaxUVImpl(v2_, v0_, v1_, t20, t01, t12, W, H, FST, bilinear, scissor, clamp, true, minU, minV, maxU, maxV);
}
// Get the minimum and maximum UV coordinates of all triangles in the current vertex buffer
// This should only be called if all vertices in the buffer are complete triangles
// W and H are the texture width and height
// minU, minV, maxU, maxV are the current min/max UV values and the output
// bilinear is whether or not to use bilinear interpolation (if false, nearest neighbor is used)
void GSState::GetTriangleMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const
{
// Get XY offset values to get XY in window coordinates
int OFX = static_cast<int>(m_context->XYOFFSET.OFX);
int OFY = static_cast<int>(m_context->XYOFFSET.OFY);
pxAssert((m_index.tail % 3) == 0); // should be a multiple of 3 for triangles
// Initialize min/max UV values
minU = std::numeric_limits<int>::max();
minV = std::numeric_limits<int>::max();
maxU = std::numeric_limits<int>::min();
maxV = std::numeric_limits<int>::min();
// Iterate through each triangle and get min/max UVs
for (size_t tri_i = 0; tri_i < m_index.tail / 3; tri_i++)
{
#if MY_DEBUG == 1
primID = tri_i;
if (s_n == 58 && tri_i == 1094)
{
printf("\n");
}
#endif
Point verts[3];
for (int vert_i = 0; vert_i < 3; vert_i++)
{
Point p;
const int xi = static_cast<int>(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].XYZ.X);
const int yi = static_cast<int>(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].XYZ.Y);
p.x = static_cast<double>(xi - OFX) / 16.0;
p.y = static_cast<double>(yi - OFY) / 16.0;
if (m_draw_env->PRIM.FST)
{
p.U = static_cast<double>(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].U) / 16.0;
p.V = static_cast<double>(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].V) / 16.0;
}
else
{
p.S = static_cast<double>(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].ST.S);
p.T = static_cast<double>(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].ST.T);
p.Q = static_cast<double>(m_vertex.buff[m_index.buff[3 * tri_i + vert_i]].RGBAQ.Q);
}
verts[vert_i] = p;
}
#if MY_DEBUG == 1
if (s_n == s_n_debug)
{
pointsHackDebugOrig[{tri_i, 0}] = {verts[0].x, verts[0].y, verts[0].S, verts[0].T};
pointsHackDebugOrig[{tri_i, 1}] = {verts[1].x, verts[1].y, verts[1].S, verts[1].T};
pointsHackDebugOrig[{tri_i, 2}] = {verts[2].x, verts[2].y, verts[2].S, verts[2].T};
}
#endif
EdgeWalkTriangleMinMaxUV(verts[0], verts[1], verts[2], W, H, m_draw_env->PRIM.FST, bilinear, m_context->SCISSOR, m_context->CLAMP, minU, minV, maxU, maxV);
}
GSState::GetClampWrapMinMaxUV(W, m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, minU, maxU, minU, maxU);
GSState::GetClampWrapMinMaxUV(H, m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, minV, maxV, minV, maxV);
}
// Get the minimum and maximum UV coordinates of all sprites in the current vertex buffer
// This should only be called if all vertices in the buffer are complete sprites
// W and H are the texture width and height
// minU, minV, maxU, maxV are the current min/max UV values and the output
// bilinear is whether or not to use bilinear interpolation (if false, nearest neighbor is used)
void GSState::GetSpriteMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const
{
// Get XY offset values to get XY in window coordinates
int OFX = static_cast<int>(m_context->XYOFFSET.OFX);
int OFY = static_cast<int>(m_context->XYOFFSET.OFY);
pxAssert((m_index.tail % 2) == 0); // should be a multiple of 2 for sprites
// Initialize min/max UV values
minU = std::numeric_limits<int>::max();
minV = std::numeric_limits<int>::max();
maxU = std::numeric_limits<int>::min();
maxV = std::numeric_limits<int>::min();
// Iterate through each sprite and get min/max UVs
for (size_t sprite_i = 0; sprite_i < m_index.tail / 2; sprite_i++)
{
Point verts[2];
for (int vert_i = 0; vert_i < 2; vert_i++)
{
int xi = static_cast<int>(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].XYZ.X);
int yi = static_cast<int>(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].XYZ.Y);
Point p;
p.x = static_cast<double>(xi - OFX) / 16.0;
p.y = static_cast<double>(yi - OFY) / 16.0;
if (m_draw_env->PRIM.FST)
{
p.U = static_cast<double>(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].U) / 16.0;
p.V = static_cast<double>(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].V) / 16.0;
}
else
{
p.U = static_cast<double>(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].ST.S) * W;
p.T = static_cast<double>(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].ST.T) * H;
p.Q = static_cast<double>(m_vertex.buff[m_index.buff[2 * sprite_i + vert_i]].RGBAQ.Q);
}
verts[vert_i] = p;
}
if (!m_draw_env->PRIM.FST)
{
// For sprites, always use the second Q value
verts[0].S = verts[0].S / verts[1].Q;
verts[0].T = verts[0].T / verts[1].Q;
verts[1].S = verts[1].S / verts[1].Q;
verts[1].T = verts[1].T / verts[1].Q;
}
double x0 = std::min(verts[0].x, verts[1].x);
double y0 = std::min(verts[0].y, verts[1].y);
double x1 = std::max(verts[0].x, verts[1].x);
double y1 = std::max(verts[0].y, verts[1].y);
double U0 = verts[0].x == x0 ? verts[0].U : verts[1].U;
double V0 = verts[0].y == y0 ? verts[0].V : verts[1].V;
double U1 = verts[0].x == x1 ? verts[0].U : verts[1].U;
double V1 = verts[0].y == y1 ? verts[0].V : verts[1].V;
int xVals[2] = { static_cast<int>(std::ceil(x0)), static_cast<int>(std::floor(x1)) };
int yVals[2] = { static_cast<int>(std::ceil(y0)), static_cast<int>(std::floor(y1)) };
if (std::floor(x1) == x1) // omit right edges
xVals[1]--;
if (std::floor(y1) == y1) // omit bottom edges
yVals[1]--;
// scissoring
int SCAX0 = static_cast<int>(m_context->SCISSOR.SCAX0);
int SCAY0 = static_cast<int>(m_context->SCISSOR.SCAY0);
int SCAX1 = static_cast<int>(m_context->SCISSOR.SCAX1);
int SCAY1 = static_cast<int>(m_context->SCISSOR.SCAY1);
for (int i = 0; i < 2; i++)
xVals[i] = std::max(SCAX0, std::min(xVals[i], SCAX1));
for (int i = 0; i < 2; i++)
yVals[i] = std::max(SCAY0, std::min(yVals[i], SCAY1));
if (xVals[0] <= xVals[1] && yVals[0] <= yVals[1])
{
for (double x : xVals)
{
const double U = ((x1 - x) * U0 + (x - x0) * U1) / (x1 - x0);
if (bilinear)
{
minU = std::min(static_cast<int>(std::floor(U - 0.5)), minU);
maxU = std::max(static_cast<int>(std::floor(U + 0.5)), maxU);
}
else
{
minU = std::min(static_cast<int>(std::floor(U)), minU);
maxU = std::max(static_cast<int>(std::floor(U)), maxU);
}
}
for (double y : yVals)
{
const double V = ((y1 - y) * V0 + (y - y0) * V1) / (y1 - y0);
if (bilinear)
{
minV = std::min(static_cast<int>(std::floor(V - 0.5)), minV);
maxV = std::max(static_cast<int>(std::floor(V + 0.5)), maxV);
}
else
{
minV = std::min(static_cast<int>(std::floor(V)), minV);
maxV = std::max(static_cast<int>(std::floor(V)), maxV);
}
}
}
}
GSState::GetClampWrapMinMaxUV(W, m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, minU, maxU, minU, maxU);
GSState::GetClampWrapMinMaxUV(H, m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, minV, maxV, minV, maxV);
}
static __fi bool IsAutoFlushEnabled()
{
return GSIsHardwareRenderer() ? (GSConfig.UserHacks_AutoFlush != GSHWAutoFlushLevel::Disabled) : GSConfig.AutoFlushSW;
@ -1675,6 +2439,7 @@ void GSState::FlushPrim()
}
#endif
// TODO: Put the accurate UV calculation here?
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
// Texel coordinate rounding
@ -3716,16 +4481,117 @@ __forceinline void GSState::VertexKick(u32 skip)
Flush(VERTEXCOUNT);
}
// FIXME: Replace old UsesRepeatRange to this.
// Maps the range [ min .. max ] under the region repeat function determined by MSK and FIX.
// The region repeat function is f(x) = (x & MSK) | FIX.
// Return true if f(x) != x for at least one x in [ min .. max ].
bool GSState::GetRegionRepeatMinMaxUV(int MSK, int FIX, int min, int max, int& min_out, int& max_out)
{
// If we cross from -1 to 0 combine the negative and positive parts separately
// as the below algorithm only works if min <= max as unsigned integers.
if (min < 0 && 0 <= max)
{
int min_out_1, max_out_1, min_out_2, max_out_2;
const bool modified_1 = GSState::GetRegionRepeatMinMaxUV(MSK, FIX, min, -1, min_out_1, max_out_1);
const bool modified_2 = GSState::GetRegionRepeatMinMaxUV(MSK, FIX, 0, max, min_out_2, max_out_2);
min_out = std::min(min_out_1, min_out_2);
max_out = std::max(max_out_1, max_out_2);
return modified_1 || modified_2;
}
const int cleared_bits = ~MSK & ~FIX; // Bits that are always cleared by applying msk and fix
const int set_bits = FIX; // Bits that are always set by applying msk and fix
unsigned long msb;
int variable_bits = min ^ max;
if (_BitScanReverse(&msb, variable_bits))
variable_bits |= (1 << msb) - 1; // Fill in all lower bits
const int always_set = min & ~variable_bits; // Bits that are set in every value in min...max
const int sometimes_set = min | variable_bits; // Bits that are set in at least one value in min...max
const bool sets_bits = (set_bits | always_set) != always_set; // At least one bit in min...max is set by applying msk and fix
const bool clears_bits = (cleared_bits & sometimes_set) != 0; // At least one bit in min...max is cleared by applying msk and fix
const int overwritten_variable_bits = (cleared_bits | set_bits) & variable_bits;
// A variable bit that's `0` in `min` will at some point switch to a `1` (because it's variable)
// When it does, all bits below it will switch to a `0` (that's how incrementing works)
// If the 0 to 1 switch is reflected in the final output (not masked and not replaced by a fixed value),
// the final value would be larger than the previous. Otherwise, the final value will be less.
// The true minimum value is `min` with all bits below the most significant replaced variable `0` bit cleared
const int min_overwritten_variable_zeros = ~min & overwritten_variable_bits;
if (_BitScanReverse(&msb, min_overwritten_variable_zeros))
min &= (~0u << msb);
// Similar thing for max, but the first masked `1` bit
const int max_overwritten_variable_ones = max & overwritten_variable_bits;
if (_BitScanReverse(&msb, max_overwritten_variable_ones))
max |= (1 << msb) - 1;
min_out = (MSK & min) | FIX;
max_out = (MSK & max) | FIX;
return sets_bits || clears_bits;
}
// Get the min/max texel coordinate (U or V) assuming it takes the values min .. max and is then
// wrapped/clamped according to the mode WM.
// SIZE: Width/height of texture (power of 2)
// MIN/MAX: Either the clamping range (in REGION_CLAMP mode) or the MKS/FIX parameters (in REGION_REPEAT mode)
// Returns true if any of the values are changed. I.e., if f(x) is the mapping function for clamp/wrap mode,
// return true if f(x) != x for some x in [ min .. max ]
bool GSState::GetClampWrapMinMaxUV(int SIZE, int WM, int MIN, int MAX, int min, int max, int& min_out, int& max_out)
{
const int MSK = MIN;
const int FIX = MAX;
if (WM == CLAMP_REPEAT)
{
// If we cross the SIZE boundary then we always get the largest/smallest possible wrapped value
if ((min & ~(SIZE - 1)) != (max & ~(SIZE - 1)))
{
min_out = 0;
max_out = SIZE - 1;
}
else
{
min_out = min & (SIZE - 1);
max_out = max & (SIZE - 1);
}
return 0 <= min && max <= SIZE - 1;
}
else if (WM == CLAMP_CLAMP)
{
min_out = std::max(0, std::min(SIZE - 1, min));
max_out = std::max(0, std::min(SIZE - 1, max));
return 0 <= min && max <= SIZE - 1;
}
else if (WM == CLAMP_REGION_CLAMP)
{
min_out = std::max(MIN, std::min(MAX, min));
max_out = std::max(MIN, std::min(MAX, max));
return MIN <= min && max <= MAX;
}
else if (WM == CLAMP_REGION_REPEAT)
{
return GSState::GetRegionRepeatMinMaxUV(MSK, FIX, min, max, min_out, max_out);
}
else
{
pxAssertMsg(false, "Invalid clamp/wrap mode");
return false;
}
}
/// Checks if region repeat is used (applying it does something to at least one of the values in min...max)
/// Also calculates the real min and max values seen after applying the region repeat to all values in min...max
static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out)
/// FIXME: CHANGE MEMBER NAMES AND MAKE SURE CALLS HAVE ARGS IN IN RIGHT ORDER!!!
bool GSState::UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out)
{
if ((min < 0) != (max < 0))
{
// Algorithm doesn't work properly if bits overflow when incrementing (happens on the -1 → 0 crossing)
// Conveniently, crossing zero guarantees you use the full range
*min_out = fix;
*max_out = (fix | msk) + 1;
*max_out = fix | msk;
return true;
}
@ -3757,7 +4623,7 @@ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, i
max |= (1 << msb) - 1;
*min_out = (msk & min) | fix;
*max_out = ((msk & max) | fix) + 1;
*max_out = (msk & max) | fix;
return sets_bits || clears_bits;
}

View File

@ -22,6 +22,9 @@ public:
virtual ~GSState();
static constexpr int GetSaveStateSize();
static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out);
static bool GetRegionRepeatMinMaxUV(int MSK, int FIX, int min, int max, int& min_out, int& max_out);
static bool GetClampWrapMinMaxUV(int SIZE, int WM, int MSK, int FIX, int min, int max, int& min_out, int& max_out);
private:
// RESTRICT prevents multiple loads of the same part of the register when accessing its bitfields (the compiler is happy to know that memory writes in-between will not go there)
@ -361,6 +364,76 @@ public:
void CalculateDisplayOffset(bool scanmask);
} PCRTCDisplays;
public:
struct Point
{
double x, y;
union
{
double U;
double S;
};
union
{
double T;
double V;
};
double Q;
Point(double x = 0, double y = 0, double US = NAN, double VT = NAN, double Q = NAN)
: x(x)
, y(y)
, U(US)
, V(VT)
, Q(Q)
{
}
};
struct EdgeFunction
{
double a, b, c;
EdgeFunction(double a, double b, double c)
: a(a)
, b(b)
, c(c)
{
}
};
enum class EdgeType
{
TOP,
RIGHT,
LEFT,
BOTTOM
};
public:
static EdgeFunction GetEdgeFunction(const Point& a, const Point& b);
static bool CheckEdgeFunction(double e, EdgeType edgeType);
static std::tuple<double, double> InterpolateEdgeFunctionsUV(
double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2);
static std::tuple<double, double, double> InterpolateEdgeFunctionsSTQ(
double e01, double e12, double e20, const Point& p0, const Point& p1, const Point& p2);
static bool CheckXYBounds(double x, double y, int minX, int minY, int maxX, int maxY, bool dxBigger, bool dyBigger);
static std::tuple<Point, Point, Point> SortPoints(Point p0, Point p1, Point p2);
static __forceinline bool CheckScissor(int x, int y, int SCAX0, int SCAY0, int SCAX1, int SCAY1);
static std::tuple<Point, Point, Point, EdgeType, EdgeType, EdgeType> SortAndClassifyTriangleVerts(Point p0, Point p1, Point p2);
static std::tuple<int, int> CalculateUVHelper(int U, int V, int W, int H, const GIFRegCLAMP& clamp);
static void CalculateUV(double e01, double e12, double e20, EdgeType t0, EdgeType t1, EdgeType t2,
const Point& p0, const Point& p1, const Point& p2,
int W, int H, bool FST, bool bilinear, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV);
static void CheckScissorUV(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear,
const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, int& minU, int& minV, int& maxU, int& maxV);
static void EdgeWalkTriangleMinMaxUVImpl(Point p0, Point p1, Point p2, EdgeType t01, EdgeType t12, EdgeType t20, int W, int H, bool FST, bool bilinear,
const GIFRegSCISSOR& scissor, const GIFRegCLAMP& clamp, bool switchOrient, int& minU, int& minV, int& maxU, int& maxV);
static void EdgeWalkTriangleMinMaxUV(Point p0, Point p1, Point p2, int W, int H, bool FST, bool bilinear, GIFRegSCISSOR scissor, GIFRegCLAMP clamp,
int& minU, int& minV, int& maxU, int& maxV);
void GetTriangleMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const;
void GetSpriteMinMaxUV(int W, int H, bool bilinear, int& minU, int& minV, int& maxU, int& maxV) const;
public:
/// Returns the appropriate directory for draw dumping.
static std::string GetDrawDumpPath(const char* format, ...);

View File

@ -183,6 +183,14 @@ void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u
for (; i < (count - 1); i += 2) // 2x loop unroll
{
processVertices(v[index[i + 0]], v[index[i + 1]], true);
if (GSState::s_n == 58)
{
fprintf(stderr, "%d: %f %f %f %f\n", i, tmin.x, tmin.y, tmax.x, tmax.y);
}
if (GSState::s_n == 58 && i == 164)
{
fprintf(stderr, "hi\n");
}
}
if (count & 1)
{

View File

@ -10,8 +10,21 @@
#include <fstream>
#include "debug.h"
#if MY_DEBUG == 1
#include <algorithm>
extern bool savePoints;
extern std::map<int, std::tuple<int, int, int, int>> pointsHackRange;
extern std::map<int, std::tuple<int, int, int, int>> pointsSWRange;
extern std::vector<std::tuple<int, int, int, int, int>> pointsHackDebug;
extern std::vector<std::tuple<int, int, int, int, int>> pointsSWDebug;
extern int s_n_debug;
extern int* primIDSW;
#else
// Comment to disable all dynamic code generation.
#define ENABLE_JIT_RASTERIZER
#endif
#if MULTI_ISA_COMPILE_ONCE
// Lack of a better home
@ -627,8 +640,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
}
}
#if MY_DEBUG == 1
int x = left - 4;
#endif
while (1)
{
#if MY_DEBUG == 1
x += 4;
#endif
if (global.s_n == 58 && top == 188 && (*primIDSW - 1 == 1094))
{
printf("");
}
do
{
int fa = 0, za = 0;
@ -712,6 +735,33 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
case ZTST_GREATER: test |= zso <= zdo; break;
}
#if MY_DEBUG == 1
if (savePoints)
{
for (int i = 0; i < 4; i++)
{
if (!pointsSWRange.contains(global.s_n))
{
pointsSWRange[global.s_n] = {10000, 10000, -10000, -10000};
}
if (global.s_n == s_n_debug)
{
if (sel.ltf)
{
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
}
else
{
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, -1, -1});
}
}
}
}
#endif
if (test.alltrue())
continue;
}
@ -1119,6 +1169,48 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
uv1 = clamp.blend8(repeat, VectorI::broadcast128(global.t.mask));
}
#if MY_DEBUG == 1
if (savePoints)
{
for (int i = 0; i < 4; i++)
{
if (test.U32[i] == 0 || sel.notest)
{
if (!pointsSWRange.contains(global.s_n))
{
pointsSWRange[global.s_n] = {10000, 10000, -10000, -10000};
}
std::get<0>(pointsSWRange[global.s_n]) = std::min(std::get<0>(pointsSWRange[global.s_n]), (int)uv0.U16[i]);
std::get<1>(pointsSWRange[global.s_n]) = std::min(std::get<1>(pointsSWRange[global.s_n]), (int)uv0.U16[i + 4]);
std::get<2>(pointsSWRange[global.s_n]) = std::max(std::get<2>(pointsSWRange[global.s_n]), (int)uv0.U16[i]);
std::get<3>(pointsSWRange[global.s_n]) = std::max(std::get<3>(pointsSWRange[global.s_n]), (int)uv0.U16[i + 4]);
if (sel.ltf)
{
std::get<0>(pointsSWRange[global.s_n]) = std::min(std::get<0>(pointsSWRange[global.s_n]), (int)uv1.U16[i]);
std::get<1>(pointsSWRange[global.s_n]) = std::min(std::get<1>(pointsSWRange[global.s_n]), (int)uv1.U16[i + 4]);
std::get<2>(pointsSWRange[global.s_n]) = std::max(std::get<2>(pointsSWRange[global.s_n]), (int)uv1.U16[i]);
std::get<3>(pointsSWRange[global.s_n]) = std::max(std::get<3>(pointsSWRange[global.s_n]), (int)uv1.U16[i + 4]);
}
if (global.s_n == s_n_debug)
{
if (sel.ltf)
{
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv0.U16[i + 4]});
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv1.U16[i + 4]});
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv1.U16[i], (int)uv0.U16[i + 4]});
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv1.U16[i], (int)uv1.U16[i + 4]});
}
else
{
pointsSWDebug.push_back({*primIDSW - 1, x + i, top, (int)uv0.U16[i], (int)uv0.U16[i + 4]});
}
}
}
}
}
#endif
VectorI y0 = uv0.uph16() << (sel.tw + 3);
VectorI x0 = uv0.upl16();

View File

@ -19,6 +19,21 @@ MULTI_ISA_UNSHARED_IMPL;
int GSRasterizerData::s_counter = 0;
#include "debug.h"
#if MY_DEBUG == 1
extern bool savePoints;
extern int s_n_debug;
extern int s_n_exit;
extern int primID;
extern int* primIDSW;
extern std::map<int, std::tuple<int, int, int, int>> pointsHackRange;
extern std::map<int, std::tuple<int, int, int, int>> pointsSWRange;
extern std::vector<std::tuple<int, int, int, int, int>> pointsHackDebug;
extern std::vector<std::tuple<int, int, int, int, int>> pointsSWDebug;
extern std::map<std::tuple<int, int>, std::tuple<double, double, double, double>> pointsHackDebugOrig;
extern std::map<std::tuple<int, int>, std::tuple<double, double, double, double>> pointsSWDebugOrig;
#endif
static int compute_best_thread_height(int threads)
{
// - for more threads screen segments should be smaller to better distribute the pixels
@ -56,6 +71,10 @@ GSRasterizer::GSRasterizer(GSDrawScanline* ds, int id, int threads)
{
m_scanline[i] = (i % threads) == id ? 1 : 0;
}
#if MY_DEBUG == 1
primIDSW = &m_primcount;
#endif
}
GSRasterizer::~GSRasterizer()
@ -597,6 +616,19 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRIC
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index)
{
#if MY_DEBUG == 1
if (GSState::s_n == s_n_debug)
{
// FIXME; WHAT IS SCALING FOR UV?
double scaleTX = 1 / (double)(1 << m_local.gd->TW) / 256.0 / 256.0;
double scaleTY = 1 / (double)(1 << m_local.gd->TH) / 256.0 / 256.0;
pointsSWDebugOrig[{m_primcount, 0}] = {vertex[index[0]].p.x, vertex[index[0]].p.y, vertex[index[0]].t.x * scaleTX, vertex[index[0]].t.y * scaleTY};
pointsSWDebugOrig[{m_primcount, 1}] = {vertex[index[1]].p.x, vertex[index[1]].p.y, vertex[index[1]].t.x * scaleTX, vertex[index[1]].t.y * scaleTY};
pointsSWDebugOrig[{m_primcount, 2}] = {vertex[index[2]].p.x, vertex[index[2]].p.y, vertex[index[2]].t.x * scaleTX, vertex[index[2]].t.y * scaleTY};
}
#endif
m_primcount++;
GSVertexSW edge;

View File

@ -10,8 +10,25 @@
MULTI_ISA_UNSHARED_IMPL;
#define USE_HACK 1
#include "debug.h"
#if MY_DEBUG == 1
extern bool savePoints;
extern int s_n_debug;
extern int s_n_exit;
extern std::map<int, std::tuple<int, int, int, int>> pointsHackRange;
extern std::map<int, std::tuple<int, int, int, int>> pointsSWRange;
extern void dumpRanges();
#endif
GSRenderer* CURRENT_ISA::makeGSRendererSW(int threads)
{
#if MY_DEBUG == 1
if (savePoints)
threads = 0;
#endif
return new GSRendererSW(threads);
}
@ -431,8 +448,6 @@ void GSRendererSW::Draw()
sd->UsePages(fb_pages, m_context->offset.fb.psm(), zb_pages, m_context->offset.zb.psm());
//
if (GSConfig.DumpGSData)
{
Sync(2);
@ -556,6 +571,11 @@ void GSRendererSW::Queue(GSRingHeap::SharedPtr<GSRasterizerData>& item)
fflush(s_fp);
}
#if MY_DEBUG == 1
sd->global.s_n = s_n;
sd->global.TW = m_context->TEX0.TW;
sd->global.TH = m_context->TEX0.TH;
#endif
m_rl->Queue(item);
// invalidate new parts rendered onto
@ -1057,7 +1077,54 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap);
GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
GSVector4i r;
#if MY_DEBUG == 1
fprintf(stderr, "%d\n", s_n);
#endif
if (m_vt.m_primclass == GS_TRIANGLE_CLASS && USE_HACK)
{
int minU = std::numeric_limits<int>::max();
int minV = std::numeric_limits<int>::max();
int maxU = std::numeric_limits<int>::min();
int maxV = std::numeric_limits<int>::min();
//GetTriangleMinMaxUV(1 << TEX0.TW, 1 << TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV);
GetTriangleMinMaxUV(1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV);
r = GSVector4i(minU, minV, maxU + 1, maxV + 1);
#if MY_DEBUG == 1
if (s_n == 58)
{
printf("");
}
GSVector4i r2 = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
if (savePoints)
{
pointsHackRange[s_n] = {minU, minV, maxU, maxV};
}
#endif
}
else if (m_vt.m_primclass == GS_SPRITE_CLASS && USE_HACK)
{
int minU = std::numeric_limits<int>::max();
int minV = std::numeric_limits<int>::max();
int maxU = std::numeric_limits<int>::min();
int maxV = std::numeric_limits<int>::min();
//GetSpriteMinMaxUV(1 << TEX0.TW, 1 << TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV);
GetSpriteMinMaxUV(1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH, gd.sel.ltf, minU, minV, maxU, maxV);
r = GSVector4i(minU, minV, maxU + 1, maxV + 1);
#if MY_DEBUG == 1
GSVector4i r2 = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
if (savePoints)
{
pointsHackRange[s_n] = {minU, minV, maxU, maxV};
}
#endif
}
else
{
r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
}
GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA);

View File

@ -9,6 +9,8 @@
#include <cstdio>
#include <string>
#include "debug.h"
union GSScanlineSelector
{
struct
@ -117,6 +119,12 @@ union GSScanlineSelector
struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a pixel shader constant buffer
{
// TODO: REMOVE DEBUG CODE
#if MY_DEBUG == 1
int s_n = -1;
int TW = -1, TH = -1;
#endif
GSScanlineSelector sel;
// - the data of vm, tex may change, multi-threaded drawing must be finished before that happens, clut and dimx are copies

View File

@ -215,7 +215,8 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
GSVector4i r = rect;
r = r.ralign<Align_Outside>(bs);
// FIXME: Is this an ok rintersect hack to make sure we don't read outside the texture?
r = r.ralign<Align_Outside>(bs).rintersect(GSVector4i(0, 0, tw, th));
if (r.eq(GSVector4i(0, 0, tw, th)))
{

2
pcsx2/debug.h Normal file
View File

@ -0,0 +1,2 @@
#pragma once
#define MY_DEBUG 1

View File

@ -592,6 +592,7 @@
<ClInclude Include="CDVD\IsoHasher.h" />
<ClInclude Include="CDVD\ThreadedFileReader.h" />
<ClInclude Include="CDVD\zlib_indexed.h" />
<ClInclude Include="debug.h" />
<ClInclude Include="DebugTools\Breakpoints.h" />
<ClInclude Include="DebugTools\DebugInterface.h" />
<ClInclude Include="DebugTools\DisassemblyManager.h" />

View File

@ -220,15 +220,6 @@
<Filter Include="System\Ps2\GS\Shaders\Direct3D">
<UniqueIdentifier>{eb697f5b-85f5-424a-a7e4-8d8b73d3426e}</UniqueIdentifier>
</Filter>
<Filter Include="Tools">
<UniqueIdentifier>{9153e32b-e1e3-49ac-b490-b56adfd1692f}</UniqueIdentifier>
</Filter>
<Filter Include="Tools\Input Recording">
<UniqueIdentifier>{03ba2aa7-2cd9-48cb-93c6-fc93d5bdc938}</UniqueIdentifier>
</Filter>
<Filter Include="Tools\Input Recording\Utilities">
<UniqueIdentifier>{78c9db9c-9c7c-4385-90e7-9fa71b922f60}</UniqueIdentifier>
</Filter>
<Filter Include="System\Ps2\USB\qemu-usb">
<UniqueIdentifier>{e068b724-9319-42e5-9ea7-63d80989ea1d}</UniqueIdentifier>
</Filter>
@ -283,12 +274,21 @@
<Filter Include="System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec\arm64">
<UniqueIdentifier>{8aea3ae6-9722-463a-94ac-34f3738a3153}</UniqueIdentifier>
</Filter>
<Filter Include="Tools\arm64">
<UniqueIdentifier>{cf847f4e-744e-4c27-a7ac-8564726fb4e6}</UniqueIdentifier>
</Filter>
<Filter Include="System\Ps2\EmotionEngine\EE\Dynarec\arm64">
<UniqueIdentifier>{cd8ec519-2196-43f7-86de-7faced2d4296}</UniqueIdentifier>
</Filter>
<Filter Include="System\Tools">
<UniqueIdentifier>{9153e32b-e1e3-49ac-b490-b56adfd1692f}</UniqueIdentifier>
</Filter>
<Filter Include="System\Tools\Input Recording">
<UniqueIdentifier>{03ba2aa7-2cd9-48cb-93c6-fc93d5bdc938}</UniqueIdentifier>
</Filter>
<Filter Include="System\Tools\Input Recording\Utilities">
<UniqueIdentifier>{78c9db9c-9c7c-4385-90e7-9fa71b922f60}</UniqueIdentifier>
</Filter>
<Filter Include="System\Tools\arm64">
<UniqueIdentifier>{cf847f4e-744e-4c27-a7ac-8564726fb4e6}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="Docs\License.txt">
@ -1119,19 +1119,19 @@
<Filter>System\Ps2\GS\Renderers\Direct3D12</Filter>
</ClCompile>
<ClCompile Include="Recording\InputRecording.cpp">
<Filter>Tools\Input Recording</Filter>
<Filter>System\Tools\Input Recording</Filter>
</ClCompile>
<ClCompile Include="Recording\InputRecordingControls.cpp">
<Filter>Tools\Input Recording</Filter>
<Filter>System\Tools\Input Recording</Filter>
</ClCompile>
<ClCompile Include="Recording\InputRecordingFile.cpp">
<Filter>Tools\Input Recording</Filter>
<Filter>System\Tools\Input Recording</Filter>
</ClCompile>
<ClCompile Include="Recording\PadData.cpp">
<Filter>Tools\Input Recording</Filter>
<Filter>System\Tools\Input Recording</Filter>
</ClCompile>
<ClCompile Include="Recording\Utilities\InputRecordingLogger.cpp">
<Filter>Tools\Input Recording\Utilities</Filter>
<Filter>System\Tools\Input Recording\Utilities</Filter>
</ClCompile>
<ClCompile Include="USB\USB.cpp">
<Filter>System\Ps2\USB</Filter>
@ -1233,7 +1233,7 @@
<Filter>System\Ps2\SPU2</Filter>
</ClCompile>
<ClCompile Include="GSDumpReplayer.cpp">
<Filter>Tools</Filter>
<Filter>System\Tools</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\DX11\D3D11ShaderCache.cpp">
<Filter>System\Ps2\GS\Renderers\Direct3D11</Filter>
@ -1302,7 +1302,7 @@
<Filter>Misc</Filter>
</ClCompile>
<ClCompile Include="PerformanceMetrics.cpp">
<Filter>Tools</Filter>
<Filter>System\Tools</Filter>
</ClCompile>
<ClCompile Include="Input\InputSource.cpp">
<Filter>Misc\Input</Filter>
@ -1329,7 +1329,7 @@
<Filter>Misc</Filter>
</ClCompile>
<ClCompile Include="Achievements.cpp">
<Filter>Tools</Filter>
<Filter>System\Tools</Filter>
</ClCompile>
<ClCompile Include="Hotkeys.cpp">
<Filter>Misc</Filter>
@ -1432,7 +1432,7 @@
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec\arm64</Filter>
</ClCompile>
<ClCompile Include="arm64\AsmHelpers.cpp">
<Filter>Tools\arm64</Filter>
<Filter>System\Tools\arm64</Filter>
</ClCompile>
<ClCompile Include="arm64\RecStubs.cpp">
<Filter>System\Ps2\EmotionEngine\EE\Dynarec\arm64</Filter>
@ -2059,19 +2059,19 @@
<Filter>System\Ps2\GS\Renderers\Direct3D12</Filter>
</ClInclude>
<ClInclude Include="Recording\InputRecording.h">
<Filter>Tools\Input Recording</Filter>
<Filter>System\Tools\Input Recording</Filter>
</ClInclude>
<ClInclude Include="Recording\InputRecordingControls.h">
<Filter>Tools\Input Recording</Filter>
<Filter>System\Tools\Input Recording</Filter>
</ClInclude>
<ClInclude Include="Recording\InputRecordingFile.h">
<Filter>Tools\Input Recording</Filter>
<Filter>System\Tools\Input Recording</Filter>
</ClInclude>
<ClInclude Include="Recording\PadData.h">
<Filter>Tools\Input Recording</Filter>
<Filter>System\Tools\Input Recording</Filter>
</ClInclude>
<ClInclude Include="Recording\Utilities\InputRecordingLogger.h">
<Filter>Tools\Input Recording\Utilities</Filter>
<Filter>System\Tools\Input Recording\Utilities</Filter>
</ClInclude>
<ClInclude Include="ShaderCacheVersion.h">
<Filter>System\Include</Filter>
@ -2191,7 +2191,7 @@
<Filter>System\Ps2\IPU</Filter>
</ClInclude>
<ClInclude Include="GSDumpReplayer.h">
<Filter>Tools</Filter>
<Filter>System\Tools</Filter>
</ClInclude>
<ClInclude Include="GS.h">
<Filter>System\Ps2\GS\GIF</Filter>
@ -2391,7 +2391,7 @@
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec\arm64</Filter>
</ClInclude>
<ClInclude Include="arm64\AsmHelpers.h">
<Filter>Tools\arm64</Filter>
<Filter>System\Tools\arm64</Filter>
</ClInclude>
<ClInclude Include="SIO\Pad\PadJogcon.h">
<Filter>System\Ps2\Iop\SIO\PAD</Filter>
@ -2399,6 +2399,9 @@
<ClInclude Include="SIO\Pad\PadNegcon.h">
<Filter>System\Ps2\Iop\SIO\PAD</Filter>
</ClInclude>
<ClInclude Include="debug.h">
<Filter>System\Ps2\GS</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<CustomBuildStep Include="rdebug\deci2.h">