GPU/HW: Use sized tristrips instead of fullscreen quads

This commit is contained in:
Stenzek 2025-01-07 17:03:26 +10:00
parent 9bc5ffe091
commit b9186139d0
No known key found for this signature in database
6 changed files with 125 additions and 26 deletions

View File

@ -1209,6 +1209,49 @@ bool GPUBackend::ApplyChromaSmoothing()
return true;
}
void GPUBackend::SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config)
{
static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = {
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0,
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)),
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0,
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)),
};
// common state
config.input_layout.vertex_attributes = screen_vertex_attributes;
config.input_layout.vertex_stride = sizeof(ScreenVertex);
config.primitive = GPUPipeline::Primitive::TriangleStrips;
}
GSVector4 GPUBackend::GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size)
{
const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw());
const GSVector2 fsize = GSVector2(rt_size);
const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f);
const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy()));
return GSVector4::xyxy(x, y).xzyw();
}
void GPUBackend::DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size,
const GSVector4 uv_bounds /* = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f) */)
{
const GSVector4 xy = GetScreenQuadClipSpaceCoordinates(bounds, rt_size);
ScreenVertex* vertices;
u32 space;
u32 base_vertex;
g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast<void**>(&vertices), &space, &base_vertex);
vertices[0].Set(xy.xy(), uv_bounds.xy());
vertices[1].Set(xy.zyzw().xy(), uv_bounds.zyzw().xy());
vertices[2].Set(xy.xwzw().xy(), uv_bounds.xwzw().xy());
vertices[3].Set(xy.zw(), uv_bounds.zw());
g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4);
g_gpu_device->Draw(4, base_vertex);
}
void GPUBackend::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio,
GSVector4i* display_rect, GSVector4i* draw_rect) const
{

View File

@ -118,6 +118,19 @@ protected:
DEINTERLACE_BUFFER_COUNT = 4,
};
struct ScreenVertex
{
float x;
float y;
float u;
float v;
ALWAYS_INLINE void Set(const GSVector2& xy, const GSVector2& uv)
{
GSVector4::store<false>(this, GSVector4::xyxy(xy, uv));
}
};
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0;
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering,
u8 interlaced_display_field) = 0;
@ -143,6 +156,12 @@ protected:
virtual bool AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) = 0;
virtual void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) = 0;
static void SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config);
static GSVector4 GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size);
void DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size,
const GSVector4 uv_bounds = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f));
/// Helper function for computing the draw rectangle in a larger window.
void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio,
GSVector4i* display_rect, GSVector4i* draw_rect) const;

View File

@ -1057,6 +1057,15 @@ bool GPU_HW::CompileCommonShaders(Error* error)
if (!m_fullscreen_quad_vertex_shader)
return false;
GL_OBJECT_NAME(m_fullscreen_quad_vertex_shader, "Fullscreen Quad Vertex Shader");
m_screen_quad_vertex_shader = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(),
shadergen.GenerateScreenVertexShader(), error);
if (!m_screen_quad_vertex_shader)
return false;
GL_OBJECT_NAME(m_screen_quad_vertex_shader, "Screen Quad Vertex Shader");
return true;
}
@ -1538,12 +1547,11 @@ bool GPU_HW::CompilePipelines(Error* error)
batch_shader_guard.Run();
// common state
plconfig.input_layout.vertex_attributes = {};
plconfig.input_layout.vertex_stride = 0;
SetScreenQuadInputLayout(plconfig);
plconfig.vertex_shader = m_screen_quad_vertex_shader.get();
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
plconfig.per_sample_shading = false;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
// VRAM fill
@ -1631,8 +1639,6 @@ bool GPU_HW::CompilePipelines(Error* error)
}
}
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
// VRAM write replacement
{
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
@ -1641,6 +1647,7 @@ bool GPU_HW::CompilePipelines(Error* error)
return false;
plconfig.fragment_shader = fs.get();
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@ -1649,6 +1656,11 @@ bool GPU_HW::CompilePipelines(Error* error)
return false;
}
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.input_layout.vertex_attributes = {};
plconfig.input_layout.vertex_stride = 0;
// VRAM update depth
if (m_write_mask_as_depth)
{
@ -1954,6 +1966,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written)
void GPU_HW::UpdateDepthBufferFromMaskBit()
{
GL_SCOPE_FMT("UpdateDepthBufferFromMaskBit()");
DebugAssert(!m_pgxp_depth_buffer && m_vram_depth_texture && m_write_mask_as_depth);
// Viewport should already be set full, only need to fudge the scissor.
@ -2997,9 +3010,10 @@ bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u
g_gpu_device->SetTextureSampler(0, tex, g_gpu_device->GetLinearSampler());
g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get());
g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height);
g_gpu_device->Draw(3, 0);
const GSVector4i rect(dst_x, dst_y, dst_x + width, dst_y + height);
g_gpu_device->SetScissor(rect);
DrawScreenQuad(rect, m_vram_texture->GetSizeVec());
RestoreDeviceContext();
return true;
}
@ -3225,9 +3239,6 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT));
g_gpu_device->SetPipeline(m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(interlaced_rendering)].get());
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetViewportAndScissor(scaled_bounds);
struct VRAMFillUBOData
{
u32 u_dst_x;
@ -3247,7 +3258,10 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)));
uniforms.u_interlaced_displayed_field = active_line_lsb;
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(scaled_bounds);
DrawScreenQuad(scaled_bounds, m_vram_texture->GetSizeVec());
RestoreDeviceContext();
}
@ -3357,14 +3371,15 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
{
DeactivateROV();
std::unique_ptr<GPUTexture> upload_texture;
GPUDevice::AutoRecycleTexture upload_texture;
u32 map_index;
if (!g_gpu_device->GetFeatures().supports_texture_buffers)
{
map_index = 0;
upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture,
GPUTexture::Format::R16U, GPUTexture::Flags::None, data, data_pitch);
upload_texture =
g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::R16U,
GPUTexture::Flags::None, data, data_pitch);
if (!upload_texture)
{
ERROR_LOG("Failed to get {}x{} upload texture. Things are gonna break.", width, height);
@ -3406,21 +3421,17 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
GetCurrentNormalizedVertexDepth()};
// the viewport should already be set to the full vram, so just adjust the scissor
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height());
g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
if (upload_texture)
{
g_gpu_device->SetTextureSampler(0, upload_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->Draw(3, 0);
g_gpu_device->RecycleTexture(std::move(upload_texture));
}
else
{
g_gpu_device->SetTextureBuffer(0, m_vram_upload_buffer.get());
g_gpu_device->Draw(3, 0);
}
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(scaled_bounds);
DrawScreenQuad(scaled_bounds, m_vram_texture->GetSizeVec());
RestoreDeviceContext();
}
@ -3492,12 +3503,13 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
GetCurrentNormalizedVertexDepth()};
// VRAM read texture should already be bound.
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetViewportAndScissor(dst_bounds_scaled);
g_gpu_device->SetPipeline(m_vram_copy_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(dst_bounds_scaled);
DrawScreenQuad(dst_bounds_scaled, m_vram_texture->GetSizeVec());
RestoreDeviceContext();
if (check_mask && !m_pgxp_depth_buffer)

View File

@ -223,6 +223,7 @@ private:
bool ShouldCheckForTexPageOverlap() const;
bool IsFlushed() const;
void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd);
void PrepareDraw(const GPUBackendDrawCommand* cmd);
@ -380,4 +381,5 @@ private:
// common shaders
std::unique_ptr<GPUShader> m_fullscreen_quad_vertex_shader;
std::unique_ptr<GPUShader> m_screen_quad_vertex_shader;
};

View File

@ -50,6 +50,27 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) const
false);
}
std::string GPU_HW_ShaderGen::GenerateScreenVertexShader() const
{
std::stringstream ss;
WriteHeader(ss);
DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0"}, 0, 1, {}, false, "", false, false, false);
ss << R"(
{
// Depth set to 1 for PGXP depth buffer.
v_pos = float4(a_pos, 1.0f, 1.0f);
v_tex0 = a_tex0;
// NDC space Y flip in Vulkan.
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
v_pos.y = -v_pos.y;
#endif
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading,
bool textured, bool palette, bool page_texture, bool uv_limits,
bool force_round_texcoords, bool pgxp_depth,

View File

@ -13,6 +13,8 @@ public:
GPU_HW_ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch);
~GPU_HW_ShaderGen();
std::string GenerateScreenVertexShader() const;
std::string GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading, bool textured, bool palette,
bool page_texture, bool uv_limits, bool force_round_texcoords, bool pgxp_depth,
bool disable_color_perspective) const;