GPU: Move vertex culling to GPU thread

i.e. push all primitives through unless they are oversized, which the
GPU will definitely skip.

Needed because of coordinate truncation in Final Fantasy VIII, these
scenes will now render correctly with the software renderer again.
This commit is contained in:
Stenzek 2024-12-28 14:46:22 +10:00
parent 58b0ccf3fc
commit c99625e4c3
No known key found for this signature in database
7 changed files with 167 additions and 96 deletions

View File

@ -326,8 +326,10 @@ private:
AddCommandTicks(pixels);
}
ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i clamped_rect, bool textured, bool semitransparent)
ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i rect, bool textured, bool semitransparent)
{
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);
u32 drawn_width = clamped_rect.width();
u32 drawn_height = clamped_rect.height();
@ -373,8 +375,9 @@ private:
AddCommandTicks(ticks_per_row * drawn_height);
}
ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i clamped_rect, bool shaded)
ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i rect, bool shaded)
{
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
u32 drawn_width = clamped_rect.width();
u32 drawn_height = clamped_rect.height();

View File

@ -97,6 +97,8 @@ GPUBackend::~GPUBackend()
bool GPUBackend::Initialize(bool clear_vram, Error* error)
{
m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area);
if (!CompileDisplayPipelines(true, true, g_gpu_settings.display_24bit_chroma_smoothing, error))
return false;
@ -451,9 +453,9 @@ void GPUBackend::HandleCommand(const GPUThreadCommand* cmd)
case GPUBackendCommandType::SetDrawingArea:
{
FlushRender();
const GPUBackendSetDrawingAreaCommand* ccmd = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd);
GPU_SW_Rasterizer::g_drawing_area = ccmd->new_area;
m_clamped_drawing_area = GPU::GetClampedDrawingArea(ccmd->new_area);
DrawingAreaChanged();
}
break;

View File

@ -174,6 +174,8 @@ protected:
void DestroyDeinterlaceTextures();
bool ApplyChromaSmoothing();
GSVector4i m_clamped_drawing_area = {};
s32 m_display_width = 0;
s32 m_display_height = 0;
s32 m_display_origin_left = 0;

View File

@ -428,22 +428,20 @@ bool GPU::HandleRenderPolygonCommand()
}
// Cull polygons which are too large.
const GSVector2 v0f = GSVector2::load<false>(&cmd->vertices[0].x);
const GSVector2 v1f = GSVector2::load<false>(&cmd->vertices[1].x);
const GSVector2 v2f = GSVector2::load<false>(&cmd->vertices[2].x);
const GSVector2 min_pos_12 = v1f.min(v2f);
const GSVector2 max_pos_12 = v1f.max(v2f);
const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f))))
.add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector2i v0 = GSVector2i::load<false>(&cmd->vertices[0].native_x);
const GSVector2i v1 = GSVector2i::load<false>(&cmd->vertices[1].native_x);
const GSVector2i v2 = GSVector2i::load<false>(&cmd->vertices[2].native_x);
const GSVector2i min_pos_12 = v1.min_s32(v2);
const GSVector2i max_pos_12 = v1.max_s32(v2);
const GSVector4i draw_rect_012 =
GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1));
const bool first_tri_culled =
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT ||
!draw_rect_012.rintersects(m_clamped_drawing_area));
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT);
if (first_tri_culled)
{
// TODO: GPU events... somehow.
DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x,
cmd->vertices[0].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y,
cmd->vertices[2].native_x, cmd->vertices[2].native_y);
DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x, cmd->vertices[0].native_y,
cmd->vertices[1].native_x, cmd->vertices[1].native_y, cmd->vertices[2].native_x,
cmd->vertices[2].native_y);
if (!rc.quad_polygon)
{
@ -462,19 +460,19 @@ bool GPU::HandleRenderPolygonCommand()
// quads
if (rc.quad_polygon)
{
const GSVector2 v3f = GSVector2::load<false>(&cmd->vertices[3].x);
const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f))))
const GSVector2i v3 = GSVector2i::load<false>(&cmd->vertices[3].native_x);
const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(v3))
.upl64(GSVector4i(max_pos_12.max_s32(v3)))
.add32(GSVector4i::cxpr(0, 0, 1, 1));
// Cull polygons which are too large.
const bool second_tri_culled =
(draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT ||
!draw_rect_123.rintersects(m_clamped_drawing_area));
(draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT);
if (second_tri_culled)
{
DEBUG_LOG("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}",
cmd->vertices[2].native_x, cmd->vertices[2].native_y, cmd->vertices[1].native_x,
cmd->vertices[1].native_y, cmd->vertices[0].native_x, cmd->vertices[0].native_y);
DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].native_x,
cmd->vertices[2].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y,
cmd->vertices[3].native_x, cmd->vertices[3].native_y);
if (first_tri_culled)
{
@ -483,9 +481,6 @@ bool GPU::HandleRenderPolygonCommand()
}
// Remove second part of quad.
// NOTE: Culling this way results in subtle differences with UV clamping, since the fourth vertex is no
// longer considered in the range. This is mainly apparent when the UV gradient is zero. Seems like it
// generally looks better this way, so I'm keeping it.
cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPrecisePolygonCommand) +
3 * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex));
cmd->num_vertices = 3;
@ -540,11 +535,10 @@ bool GPU::HandleRenderPolygonCommand()
const GSVector4i draw_rect_012 =
GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1));
const bool first_tri_culled =
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT ||
!draw_rect_012.rintersects(m_clamped_drawing_area));
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT);
if (first_tri_culled)
{
DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y,
DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y,
cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y);
if (!rc.quad_polygon)
@ -568,12 +562,11 @@ bool GPU::HandleRenderPolygonCommand()
// Cull polygons which are too large.
const bool second_tri_culled =
(draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT ||
!draw_rect_123.rintersects(m_clamped_drawing_area));
(draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT);
if (second_tri_culled)
{
DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x,
cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y);
cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[3].x, cmd->vertices[3].y);
if (first_tri_culled)
{
@ -681,15 +674,7 @@ bool GPU::HandleRenderRectangleCommand()
}
const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height);
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);
if (clamped_rect.rempty()) [[unlikely]]
{
DEBUG_LOG("Culling off-screen rectangle {}", rect);
EndCommand();
return true;
}
AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable);
AddDrawRectangleTicks(rect, rc.texture_enable, rc.transparency_enable);
GPUBackend::PushCommand(cmd);
EndCommand();
@ -883,15 +868,13 @@ void GPU::FinishPolyline()
const GSVector2 end_pos = GSVector2::load<false>(&end.x);
const GSVector4i rect =
GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT)
{
DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y);
}
else
{
AddDrawLineTicks(clamped_rect, m_render_command.shading_enable);
AddDrawLineTicks(rect, m_render_command.shading_enable);
cmd->vertices[out_vertex_count++] = start;
cmd->vertices[out_vertex_count++] = end;
@ -930,15 +913,13 @@ void GPU::FinishPolyline()
const GSVector4i rect =
GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT)
{
DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y);
DEBUG_LOG("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y);
}
else
{
AddDrawLineTicks(clamped_rect, m_render_command.shading_enable);
AddDrawLineTicks(rect, m_render_command.shading_enable);
GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count];
out_vertex_count += 2;

View File

@ -296,7 +296,7 @@ bool GPU_HW::Initialize(bool upload_vram, Error* error)
if (upload_vram)
UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT);
DrawingAreaChanged();
m_drawing_area_changed = true;
return true;
}
@ -2494,7 +2494,12 @@ void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)
const GSVector4i rect =
GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty());
DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT);
if (clamped_rect.rempty())
{
GL_INS_FMT("Culling off-screen line {} => {}", start_pos, end_pos);
continue;
}
AddDrawnRectangle(clamped_rect);
DrawLine(GSVector4(bounds), start_color, end_color, depth);
@ -2533,7 +2538,11 @@ void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)
const GSVector4i rect =
GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty());
if (clamped_rect.rempty())
{
GL_INS_FMT("Culling off-screen line {} => {}", start_pos, end_pos);
continue;
}
AddDrawnRectangle(clamped_rect);
DrawLine(bounds, start_color, end_color, depth);
@ -2658,6 +2667,16 @@ void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth)
void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd)
{
const GSVector2i pos = GSVector2i::load<true>(&cmd->x);
const GSVector2i size = GSVector2i::load<true>(&cmd->width).u16to32();
const GSVector4i rect = GSVector4i::xyxy(pos, pos.add32(size));
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);
if (clamped_rect.rempty())
{
GL_INS_FMT("Culling off-screen sprite {}", rect);
return;
}
PrepareDraw(cmd);
SetBatchDepthBuffer(cmd, false);
SetBatchSpriteMode(cmd, m_allow_sprite_mode);
@ -2673,11 +2692,6 @@ void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd)
const u32 rectangle_width = cmd->width;
const u32 rectangle_height = cmd->height;
const GSVector4i rect =
GSVector4i(pos_x, pos_y, pos_x + static_cast<s32>(rectangle_width), pos_y + static_cast<s32>(rectangle_height));
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);
DebugAssert(!clamped_rect.rempty());
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u32 tex_top = orig_tex_top;
for (u32 y_offset = 0; y_offset < rectangle_height;)
@ -2747,15 +2761,12 @@ void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd)
void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
{
PrepareDraw(cmd);
SetBatchDepthBuffer(cmd, false);
// TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below.
const float depth = GetCurrentNormalizedVertexDepth();
const bool raw_texture = (cmd->texture_enable && cmd->raw_texture_enable);
const u32 num_vertices = cmd->num_vertices;
const u32 texpage = m_draw_mode.bits;
const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16);
std::array<BatchVertex, 4> vertices;
u32 num_vertices = cmd->num_vertices;
for (u32 i = 0; i < num_vertices; i++)
{
const GPUBackendDrawPolygonCommand::Vertex& vert = cmd->vertices[i];
@ -2764,7 +2775,13 @@ void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
vert.texcoord, 0xFFFF0000u);
}
FinishPolygonDraw(cmd, vertices, num_vertices, false, false);
GSVector4i clamped_draw_rect_012, clamped_draw_rect_123;
if (BeginPolygonDraw(cmd, vertices, num_vertices, clamped_draw_rect_012, clamped_draw_rect_123))
{
SetBatchDepthBuffer(cmd, false);
FinishPolygonDraw(cmd, vertices, num_vertices, false, false, clamped_draw_rect_012, clamped_draw_rect_123);
}
if (ShouldDrawWithSoftwareRenderer())
{
@ -2778,14 +2795,12 @@ void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd)
{
PrepareDraw(cmd);
// TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below.
const float depth = GetCurrentNormalizedVertexDepth();
const bool raw_texture = (cmd->texture_enable && cmd->raw_texture_enable);
const u32 num_vertices = cmd->num_vertices;
const u32 texpage = m_draw_mode.bits;
const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16);
std::array<BatchVertex, 4> vertices;
u32 num_vertices = cmd->num_vertices;
for (u32 i = 0; i < num_vertices; i++)
{
const GPUBackendDrawPrecisePolygonCommand::Vertex& vert = cmd->vertices[i];
@ -2793,14 +2808,18 @@ void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd)
vert.texcoord, 0xFFFF0000u);
}
const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w;
SetBatchDepthBuffer(cmd, use_depth);
if (use_depth)
CheckForDepthClear(cmd, vertices.data(), num_vertices);
GSVector4i clamped_draw_rect_012, clamped_draw_rect_123;
if (BeginPolygonDraw(cmd, vertices, num_vertices, clamped_draw_rect_012, clamped_draw_rect_123))
{
const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w;
SetBatchDepthBuffer(cmd, use_depth);
if (use_depth)
CheckForDepthClear(cmd, vertices.data(), num_vertices);
// Use PGXP to exclude primitives that are definitely 3D.
const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w);
FinishPolygonDraw(cmd, vertices, num_vertices, true, is_3d);
// Use PGXP to exclude primitives that are definitely 3D.
const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w);
FinishPolygonDraw(cmd, vertices, num_vertices, true, is_3d, clamped_draw_rect_012, clamped_draw_rect_123);
}
if (ShouldDrawWithSoftwareRenderer())
{
@ -2820,16 +2839,11 @@ void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd)
}
}
ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand* cmd,
std::array<BatchVertex, 4>& vertices, u32 num_vertices,
bool is_precise, bool is_3d)
ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand* cmd,
std::array<BatchVertex, 4>& vertices, u32& num_vertices,
GSVector4i& clamped_draw_rect_012,
GSVector4i& clamped_draw_rect_123)
{
// Use PGXP to exclude primitives that are definitely 3D.
if (m_resolution_scale > 1 && !is_3d && cmd->quad_polygon)
HandleFlippedQuadTextureCoordinates(cmd, vertices.data());
else if (m_allow_sprite_mode)
SetBatchSpriteMode(cmd, is_precise ? !is_3d : IsPossibleSpritePolygon(vertices.data()));
const GSVector2 v0f = GSVector2::load<false>(&vertices[0].x);
const GSVector2 v1f = GSVector2::load<false>(&vertices[1].x);
const GSVector2 v2f = GSVector2::load<false>(&vertices[2].x);
@ -2837,9 +2851,69 @@ ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand
const GSVector2 max_pos_12 = v1f.max(v2f);
const GSVector4i draw_rect_012 =
GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area);
DebugAssert(draw_rect_012.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_012.height() <= MAX_PRIMITIVE_HEIGHT &&
!clamped_draw_rect_012.rempty());
clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area);
const bool first_tri_culled = clamped_draw_rect_012.rempty();
if (first_tri_culled)
{
GL_INS_FMT("Culling off-screen polygon: {},{} {},{} {},{}", vertices[0].x, vertices[0].y, vertices[1].y,
vertices[1].x, vertices[2].y, vertices[2].y);
if (num_vertices != 4)
return false;
}
if (num_vertices == 4)
{
const GSVector2 v3f = GSVector2::load<false>(&vertices[3].x);
const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f))))
.add32(GSVector4i::cxpr(0, 0, 1, 1));
clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area);
const bool second_tri_culled = clamped_draw_rect_123.rempty();
if (second_tri_culled)
{
GL_INS_FMT("Culling off-screen polygon (quad second half): {},{} {},{} {},{}", vertices[2].x, vertices[2].y,
vertices[1].x, vertices[1].y, vertices[3].x, vertices[3].y);
if (first_tri_culled)
{
// both parts culled
return false;
}
// Remove second part of quad.
// NOTE: Culling this way results in subtle differences with UV clamping, since the fourth vertex is no
// longer considered in the range. This is mainly apparent when the UV gradient is zero. Seems like it
// generally looks better this way, so I'm keeping it.
num_vertices = 3;
}
else
{
// If first part was culled, move the second part to the first.
if (first_tri_culled)
{
clamped_draw_rect_012 = clamped_draw_rect_123;
std::memcpy(&vertices[0], &vertices[2], sizeof(BatchVertex));
std::memcpy(&vertices[2], &vertices[3], sizeof(BatchVertex));
num_vertices = 3;
}
}
}
PrepareDraw(cmd);
return true;
}
ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand* cmd,
std::array<BatchVertex, 4>& vertices, u32 num_vertices,
bool is_precise, bool is_3d,
const GSVector4i clamped_draw_rect_012,
const GSVector4i clamped_draw_rect_123)
{
// Use PGXP to exclude primitives that are definitely 3D.
if (m_resolution_scale > 1 && !is_3d && cmd->quad_polygon)
HandleFlippedQuadTextureCoordinates(cmd, vertices.data());
else if (m_allow_sprite_mode)
SetBatchSpriteMode(cmd, is_precise ? !is_3d : IsPossibleSpritePolygon(vertices.data()));
if (cmd->texture_enable && m_compute_uv_range)
ComputePolygonUVLimits(cmd, vertices.data(), num_vertices);
@ -2864,12 +2938,6 @@ ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand
// quads, use num_vertices here, because the first half might be culled
if (num_vertices == 4)
{
const GSVector2 v3f = GSVector2::load<false>(&vertices[3].x);
const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f))))
.add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area);
DebugAssert(draw_rect_123.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_123.height() <= MAX_PRIMITIVE_HEIGHT &&
!clamped_draw_rect_123.rempty());
AddDrawnRectangle(clamped_draw_rect_123);
DebugAssert(m_batch_index_space >= 3);
@ -3723,7 +3791,7 @@ void GPU_HW::FlushRender()
void GPU_HW::DrawingAreaChanged()
{
m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area);
FlushRender();
m_drawing_area_changed = true;
}

View File

@ -227,8 +227,11 @@ private:
void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd);
void PrepareDraw(const GPUBackendDrawCommand* cmd);
bool BeginPolygonDraw(const GPUBackendDrawCommand* cmd, std::array<BatchVertex, 4>& vertices, u32& num_vertices,
GSVector4i& clamped_draw_rect_012, GSVector4i& clamped_draw_rect_123);
void FinishPolygonDraw(const GPUBackendDrawCommand* cmd, std::array<BatchVertex, 4>& vertices, u32 num_vertices,
bool is_precise, bool is_3d);
bool is_precise, bool is_3d, const GSVector4i clamped_draw_rect_012,
const GSVector4i clamped_draw_rect_123);
void ResetBatchVertexDepth();
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
@ -325,7 +328,6 @@ private:
BatchUBOData m_batch_ubo_data = {};
// Bounding box of VRAM area that the GPU has drawn into.
GSVector4i m_clamped_drawing_area = {};
GSVector4i m_vram_dirty_draw_rect = INVALID_RECT;
GSVector4i m_vram_dirty_write_rect = INVALID_RECT; // TODO: Don't use in TC mode, should be kept at zero.
GSVector4i m_current_uv_rect = INVALID_RECT;

View File

@ -12,6 +12,7 @@
#include "common/align.h"
#include "common/assert.h"
#include "common/gsvector_formatter.h"
#include "common/intrin.h"
#include "common/log.h"
@ -135,6 +136,18 @@ void GPU_SW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd)
void GPU_SW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd)
{
// Sprites coordinates are truncated in the GPU class, so it's safe to cull them here.
// Probably wrong, but if we ever change it, this should be removed.
const GSVector2i pos = GSVector2i::load<true>(&cmd->x);
const GSVector2i size = GSVector2i::load<true>(&cmd->width).u16to32();
const GSVector4i rect = GSVector4i::xyxy(pos, pos.add32(size));
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);
if (clamped_rect.rempty())
{
DEBUG_LOG("Culling off-screen sprite {}", rect);
return;
}
const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction =
GPU_SW_Rasterizer::GetDrawRectangleFunction(cmd->texture_enable, cmd->raw_texture_enable, cmd->transparency_enable);