Software: Use new scissor logic

Unlike the hardware backends, the software renderer can use multiple scissor rectangles (though this will result in extra rasterization).
This commit is contained in:
Pokechu22 2021-11-09 18:38:24 -08:00
parent 076392a0f6
commit 925ceab82f
5 changed files with 66 additions and 48 deletions

View File

@ -307,7 +307,7 @@ void ProcessTriangle(OutputVertexData* v0, OutputVertexData* v1, OutputVertexDat
PerspectiveDivide(v0);
PerspectiveDivide(v1);
PerspectiveDivide(v2);
Rasterizer::UpdateZSlope(v0, v1, v2);
Rasterizer::UpdateZSlope(v0, v1, v2, bpmem.scissorOffset.x * 2, bpmem.scissorOffset.y * 2);
INCSTAT(g_stats.this_frame.num_triangles_culled)
return;
}
@ -320,7 +320,7 @@ void ProcessTriangle(OutputVertexData* v0, OutputVertexData* v1, OutputVertexDat
PerspectiveDivide(v0);
PerspectiveDivide(v2);
PerspectiveDivide(v1);
Rasterizer::UpdateZSlope(v0, v2, v1);
Rasterizer::UpdateZSlope(v0, v2, v1, bpmem.scissorOffset.x * 2, bpmem.scissorOffset.y * 2);
INCSTAT(g_stats.this_frame.num_triangles_culled)
return;
}
@ -533,10 +533,8 @@ void PerspectiveDivide(OutputVertexData* vertex)
Vec3& screen = vertex->screenPosition;
float wInverse = 1.0f / projected.w;
screen.x =
projected.x * wInverse * xfmem.viewport.wd + xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2;
screen.y =
projected.y * wInverse * xfmem.viewport.ht + xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2;
screen.x = projected.x * wInverse * xfmem.viewport.wd + xfmem.viewport.xOrig;
screen.y = projected.y * wInverse * xfmem.viewport.ht + xfmem.viewport.yOrig;
screen.z = projected.z * wInverse * xfmem.viewport.zRange + xfmem.viewport.farZ;
}
} // namespace Clipper

View File

@ -5,11 +5,16 @@
#include <algorithm>
#include <cstring>
#include <vector>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "VideoBackends/Software/EfbInterface.h"
#include "VideoBackends/Software/NativeVertexFormat.h"
#include "VideoBackends/Software/Tev.h"
#include "VideoCommon/BPFunctions.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VideoCommon.h"
@ -23,14 +28,14 @@ static constexpr int BLOCK_SIZE = 2;
struct SlopeContext
{
SlopeContext(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2,
s32 x0, s32 y0)
s32 x0, s32 y0, s32 x_off, s32 y_off)
: x0(x0), y0(y0)
{
// adjust a little less than 0.5
const float adjust = 0.495f;
xOff = ((float)x0 - v0->screenPosition.x) + adjust;
yOff = ((float)y0 - v0->screenPosition.y) + adjust;
xOff = ((float)x0 - (v0->screenPosition.x - x_off)) + adjust;
yOff = ((float)y0 - (v0->screenPosition.y - y_off)) + adjust;
dx10 = v1->screenPosition.x - v0->screenPosition.x;
dx20 = v2->screenPosition.x - v0->screenPosition.x;
@ -99,6 +104,8 @@ static Slope TexSlopes[8][3];
static Tev tev;
static RasterBlock rasterBlock;
static std::vector<BPFunctions::ScissorRect> scissors;
void Init()
{
tev.Init();
@ -108,6 +115,11 @@ void Init()
ZSlope = Slope();
}
void ScissorChanged()
{
scissors = std::move(BPFunctions::ComputeScissorRects().m_result);
}
// Returns approximation of log2(f) in s28.4
// results are close enough to use for LOD
static s32 FixedLog2(float f)
@ -302,37 +314,36 @@ static void BuildBlock(s32 blockX, s32 blockY)
}
void UpdateZSlope(const OutputVertexData* v0, const OutputVertexData* v1,
const OutputVertexData* v2)
const OutputVertexData* v2, s32 x_off, s32 y_off)
{
if (!bpmem.genMode.zfreeze)
{
const s32 X1 = iround(16.0f * v0->screenPosition[0]) - 9;
const s32 Y1 = iround(16.0f * v0->screenPosition[1]) - 9;
const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4);
const s32 X1 = iround(16.0f * (v0->screenPosition.x - x_off)) - 9;
const s32 Y1 = iround(16.0f * (v0->screenPosition.y - y_off)) - 9;
const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4, x_off, y_off);
ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx);
}
}
void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1,
const OutputVertexData* v2)
static void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1,
const OutputVertexData* v2,
const BPFunctions::ScissorRect& scissor)
{
INCSTAT(g_stats.this_frame.num_triangles_drawn);
// The zslope should be updated now, even if the triangle is rejected by the scissor test, as
// zfreeze depends on it
UpdateZSlope(v0, v1, v2);
UpdateZSlope(v0, v1, v2, scissor.x_off, scissor.y_off);
// adapted from http://devmaster.net/posts/6145/advanced-rasterization
// 28.4 fixed-pou32 coordinates. rounded to nearest and adjusted to match hardware output
// could also take floor and adjust -8
const s32 Y1 = iround(16.0f * v0->screenPosition[1]) - 9;
const s32 Y2 = iround(16.0f * v1->screenPosition[1]) - 9;
const s32 Y3 = iround(16.0f * v2->screenPosition[1]) - 9;
const s32 Y1 = iround(16.0f * (v0->screenPosition.y - scissor.y_off)) - 9;
const s32 Y2 = iround(16.0f * (v1->screenPosition.y - scissor.y_off)) - 9;
const s32 Y3 = iround(16.0f * (v2->screenPosition.y - scissor.y_off)) - 9;
const s32 X1 = iround(16.0f * v0->screenPosition[0]) - 9;
const s32 X2 = iround(16.0f * v1->screenPosition[0]) - 9;
const s32 X3 = iround(16.0f * v2->screenPosition[0]) - 9;
const s32 X1 = iround(16.0f * (v0->screenPosition.x - scissor.x_off)) - 9;
const s32 X2 = iround(16.0f * (v1->screenPosition.x - scissor.x_off)) - 9;
const s32 X3 = iround(16.0f * (v2->screenPosition.x - scissor.x_off)) - 9;
// Deltas
const s32 DX12 = X1 - X2;
@ -359,35 +370,22 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v
s32 maxy = (std::max(std::max(Y1, Y2), Y3) + 0xF) >> 4;
// scissor
s32 xoff = bpmem.scissorOffset.x * 2;
s32 yoff = bpmem.scissorOffset.y * 2;
ASSERT(scissor.rect.left >= 0);
ASSERT(scissor.rect.right <= EFB_WIDTH);
ASSERT(scissor.rect.top >= 0);
ASSERT(scissor.rect.bottom <= EFB_HEIGHT);
s32 scissorLeft = bpmem.scissorTL.x - xoff;
if (scissorLeft < 0)
scissorLeft = 0;
s32 scissorTop = bpmem.scissorTL.y - yoff;
if (scissorTop < 0)
scissorTop = 0;
s32 scissorRight = bpmem.scissorBR.x - xoff + 1;
if (scissorRight > s32(EFB_WIDTH))
scissorRight = EFB_WIDTH;
s32 scissorBottom = bpmem.scissorBR.y - yoff + 1;
if (scissorBottom > s32(EFB_HEIGHT))
scissorBottom = EFB_HEIGHT;
minx = std::max(minx, scissorLeft);
maxx = std::min(maxx, scissorRight);
miny = std::max(miny, scissorTop);
maxy = std::min(maxy, scissorBottom);
minx = std::max(minx, scissor.rect.left);
maxx = std::min(maxx, scissor.rect.right);
miny = std::max(miny, scissor.rect.top);
maxy = std::min(maxy, scissor.rect.bottom);
if (minx >= maxx || miny >= maxy)
return;
// Set up the remaining slopes
const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4);
const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4, scissor.x_off,
scissor.y_off);
float w[3] = {1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w,
1.0f / v2->projectedPosition.w};
@ -504,4 +502,13 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v
}
}
}
void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1,
const OutputVertexData* v2)
{
INCSTAT(g_stats.this_frame.num_triangles_drawn);
for (const auto& scissor : scissors)
DrawTriangleFrontFace(v0, v1, v2, scissor);
}
} // namespace Rasterizer

View File

@ -10,9 +10,10 @@ struct OutputVertexData;
namespace Rasterizer
{
void Init();
void ScissorChanged();
void UpdateZSlope(const OutputVertexData* v0, const OutputVertexData* v1,
const OutputVertexData* v2);
const OutputVertexData* v2, s32 x_off, s32 y_off);
void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1,
const OutputVertexData* v2);

View File

@ -12,6 +12,7 @@
#include "VideoBackends/Software/EfbCopy.h"
#include "VideoBackends/Software/EfbInterface.h"
#include "VideoBackends/Software/Rasterizer.h"
#include "VideoBackends/Software/SWBoundingBox.h"
#include "VideoBackends/Software/SWOGLWindow.h"
#include "VideoBackends/Software/SWTexture.h"
@ -179,4 +180,13 @@ SWRenderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
return std::make_unique<NativeVertexFormat>(vtx_decl);
}
void SWRenderer::SetScissorRect(const MathUtil::Rectangle<int>& rc)
{
// BPFunctions calls SetScissorRect with the "best" scissor rect whenever the viewport or scissor
// changes. However, the software renderer is actually able to use multiple scissor rects (which
// is necessary in a few renderering edge cases, such as with Major Minor's Majestic March).
// Thus, we use this as a signal to update the list of scissor rects, but ignore the parameter.
Rasterizer::ScissorChanged();
}
} // namespace SW

View File

@ -58,6 +58,8 @@ public:
const AbstractTexture* src_texture,
const MathUtil::Rectangle<int>& src_rect) override;
void SetScissorRect(const MathUtil::Rectangle<int>& rc) override;
protected:
std::unique_ptr<BoundingBox> CreateBoundingBox() const override;