GPU/HW: Use sized tristrips instead of fullscreen quads
This commit is contained in:
parent
9bc5ffe091
commit
b9186139d0
|
@ -1209,6 +1209,49 @@ bool GPUBackend::ApplyChromaSmoothing()
|
|||
return true;
|
||||
}
|
||||
|
||||
void GPUBackend::SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config)
|
||||
{
|
||||
static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = {
|
||||
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0,
|
||||
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)),
|
||||
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0,
|
||||
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)),
|
||||
};
|
||||
|
||||
// common state
|
||||
config.input_layout.vertex_attributes = screen_vertex_attributes;
|
||||
config.input_layout.vertex_stride = sizeof(ScreenVertex);
|
||||
config.primitive = GPUPipeline::Primitive::TriangleStrips;
|
||||
}
|
||||
|
||||
GSVector4 GPUBackend::GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size)
|
||||
{
|
||||
const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw());
|
||||
const GSVector2 fsize = GSVector2(rt_size);
|
||||
const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f);
|
||||
const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy()));
|
||||
return GSVector4::xyxy(x, y).xzyw();
|
||||
}
|
||||
|
||||
void GPUBackend::DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size,
|
||||
const GSVector4 uv_bounds /* = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f) */)
|
||||
{
|
||||
const GSVector4 xy = GetScreenQuadClipSpaceCoordinates(bounds, rt_size);
|
||||
|
||||
ScreenVertex* vertices;
|
||||
u32 space;
|
||||
u32 base_vertex;
|
||||
g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast<void**>(&vertices), &space, &base_vertex);
|
||||
|
||||
vertices[0].Set(xy.xy(), uv_bounds.xy());
|
||||
vertices[1].Set(xy.zyzw().xy(), uv_bounds.zyzw().xy());
|
||||
vertices[2].Set(xy.xwzw().xy(), uv_bounds.xwzw().xy());
|
||||
vertices[3].Set(xy.zw(), uv_bounds.zw());
|
||||
|
||||
g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4);
|
||||
g_gpu_device->Draw(4, base_vertex);
|
||||
}
|
||||
|
||||
void GPUBackend::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio,
|
||||
GSVector4i* display_rect, GSVector4i* draw_rect) const
|
||||
{
|
||||
|
|
|
@ -118,6 +118,19 @@ protected:
|
|||
DEINTERLACE_BUFFER_COUNT = 4,
|
||||
};
|
||||
|
||||
struct ScreenVertex
|
||||
{
|
||||
float x;
|
||||
float y;
|
||||
float u;
|
||||
float v;
|
||||
|
||||
ALWAYS_INLINE void Set(const GSVector2& xy, const GSVector2& uv)
|
||||
{
|
||||
GSVector4::store<false>(this, GSVector4::xyxy(xy, uv));
|
||||
}
|
||||
};
|
||||
|
||||
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0;
|
||||
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering,
|
||||
u8 interlaced_display_field) = 0;
|
||||
|
@ -143,6 +156,12 @@ protected:
|
|||
virtual bool AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) = 0;
|
||||
virtual void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) = 0;
|
||||
|
||||
static void SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config);
|
||||
static GSVector4 GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size);
|
||||
|
||||
void DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size,
|
||||
const GSVector4 uv_bounds = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f));
|
||||
|
||||
/// Helper function for computing the draw rectangle in a larger window.
|
||||
void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio,
|
||||
GSVector4i* display_rect, GSVector4i* draw_rect) const;
|
||||
|
|
|
@ -1057,6 +1057,15 @@ bool GPU_HW::CompileCommonShaders(Error* error)
|
|||
if (!m_fullscreen_quad_vertex_shader)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_fullscreen_quad_vertex_shader, "Fullscreen Quad Vertex Shader");
|
||||
|
||||
m_screen_quad_vertex_shader = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(),
|
||||
shadergen.GenerateScreenVertexShader(), error);
|
||||
if (!m_screen_quad_vertex_shader)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_screen_quad_vertex_shader, "Screen Quad Vertex Shader");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1538,12 +1547,11 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
batch_shader_guard.Run();
|
||||
|
||||
// common state
|
||||
plconfig.input_layout.vertex_attributes = {};
|
||||
plconfig.input_layout.vertex_stride = 0;
|
||||
SetScreenQuadInputLayout(plconfig);
|
||||
plconfig.vertex_shader = m_screen_quad_vertex_shader.get();
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
plconfig.per_sample_shading = false;
|
||||
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
|
||||
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
|
||||
plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
|
||||
|
||||
// VRAM fill
|
||||
|
@ -1631,8 +1639,6 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
}
|
||||
}
|
||||
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
|
||||
// VRAM write replacement
|
||||
{
|
||||
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
|
||||
|
@ -1641,6 +1647,7 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
return false;
|
||||
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
|
||||
if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
|
@ -1649,6 +1656,11 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
return false;
|
||||
}
|
||||
|
||||
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
|
||||
plconfig.primitive = GPUPipeline::Primitive::Triangles;
|
||||
plconfig.input_layout.vertex_attributes = {};
|
||||
plconfig.input_layout.vertex_stride = 0;
|
||||
|
||||
// VRAM update depth
|
||||
if (m_write_mask_as_depth)
|
||||
{
|
||||
|
@ -1954,6 +1966,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written)
|
|||
|
||||
void GPU_HW::UpdateDepthBufferFromMaskBit()
|
||||
{
|
||||
GL_SCOPE_FMT("UpdateDepthBufferFromMaskBit()");
|
||||
DebugAssert(!m_pgxp_depth_buffer && m_vram_depth_texture && m_write_mask_as_depth);
|
||||
|
||||
// Viewport should already be set full, only need to fudge the scissor.
|
||||
|
@ -2997,9 +3010,10 @@ bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u
|
|||
|
||||
g_gpu_device->SetTextureSampler(0, tex, g_gpu_device->GetLinearSampler());
|
||||
g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get());
|
||||
g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height);
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
const GSVector4i rect(dst_x, dst_y, dst_x + width, dst_y + height);
|
||||
g_gpu_device->SetScissor(rect);
|
||||
DrawScreenQuad(rect, m_vram_texture->GetSizeVec());
|
||||
RestoreDeviceContext();
|
||||
return true;
|
||||
}
|
||||
|
@ -3225,9 +3239,6 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
|
|||
const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT));
|
||||
g_gpu_device->SetPipeline(m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(interlaced_rendering)].get());
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetViewportAndScissor(scaled_bounds);
|
||||
|
||||
struct VRAMFillUBOData
|
||||
{
|
||||
u32 u_dst_x;
|
||||
|
@ -3247,7 +3258,10 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
|
|||
GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)));
|
||||
uniforms.u_interlaced_displayed_field = active_line_lsb;
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds);
|
||||
DrawScreenQuad(scaled_bounds, m_vram_texture->GetSizeVec());
|
||||
|
||||
RestoreDeviceContext();
|
||||
}
|
||||
|
@ -3357,14 +3371,15 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
|
|||
{
|
||||
DeactivateROV();
|
||||
|
||||
std::unique_ptr<GPUTexture> upload_texture;
|
||||
GPUDevice::AutoRecycleTexture upload_texture;
|
||||
u32 map_index;
|
||||
|
||||
if (!g_gpu_device->GetFeatures().supports_texture_buffers)
|
||||
{
|
||||
map_index = 0;
|
||||
upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture,
|
||||
GPUTexture::Format::R16U, GPUTexture::Flags::None, data, data_pitch);
|
||||
upload_texture =
|
||||
g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::R16U,
|
||||
GPUTexture::Flags::None, data, data_pitch);
|
||||
if (!upload_texture)
|
||||
{
|
||||
ERROR_LOG("Failed to get {}x{} upload texture. Things are gonna break.", width, height);
|
||||
|
@ -3406,21 +3421,17 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
|
|||
GetCurrentNormalizedVertexDepth()};
|
||||
|
||||
// the viewport should already be set to the full vram, so just adjust the scissor
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height());
|
||||
g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
|
||||
if (upload_texture)
|
||||
{
|
||||
g_gpu_device->SetTextureSampler(0, upload_texture.get(), g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->Draw(3, 0);
|
||||
g_gpu_device->RecycleTexture(std::move(upload_texture));
|
||||
}
|
||||
else
|
||||
{
|
||||
g_gpu_device->SetTextureBuffer(0, m_vram_upload_buffer.get());
|
||||
g_gpu_device->Draw(3, 0);
|
||||
}
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds);
|
||||
DrawScreenQuad(scaled_bounds, m_vram_texture->GetSizeVec());
|
||||
|
||||
RestoreDeviceContext();
|
||||
}
|
||||
|
@ -3492,12 +3503,13 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
|
|||
GetCurrentNormalizedVertexDepth()};
|
||||
|
||||
// VRAM read texture should already be bound.
|
||||
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetViewportAndScissor(dst_bounds_scaled);
|
||||
g_gpu_device->SetPipeline(m_vram_copy_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
|
||||
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(dst_bounds_scaled);
|
||||
DrawScreenQuad(dst_bounds_scaled, m_vram_texture->GetSizeVec());
|
||||
RestoreDeviceContext();
|
||||
|
||||
if (check_mask && !m_pgxp_depth_buffer)
|
||||
|
|
|
@ -223,6 +223,7 @@ private:
|
|||
bool ShouldCheckForTexPageOverlap() const;
|
||||
|
||||
bool IsFlushed() const;
|
||||
|
||||
void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
|
||||
void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd);
|
||||
void PrepareDraw(const GPUBackendDrawCommand* cmd);
|
||||
|
@ -380,4 +381,5 @@ private:
|
|||
|
||||
// common shaders
|
||||
std::unique_ptr<GPUShader> m_fullscreen_quad_vertex_shader;
|
||||
std::unique_ptr<GPUShader> m_screen_quad_vertex_shader;
|
||||
};
|
||||
|
|
|
@ -50,6 +50,27 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) const
|
|||
false);
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateScreenVertexShader() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0"}, 0, 1, {}, false, "", false, false, false);
|
||||
ss << R"(
|
||||
{
|
||||
// Depth set to 1 for PGXP depth buffer.
|
||||
v_pos = float4(a_pos, 1.0f, 1.0f);
|
||||
v_tex0 = a_tex0;
|
||||
|
||||
// NDC space Y flip in Vulkan.
|
||||
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
|
||||
v_pos.y = -v_pos.y;
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading,
|
||||
bool textured, bool palette, bool page_texture, bool uv_limits,
|
||||
bool force_round_texcoords, bool pgxp_depth,
|
||||
|
|
|
@ -13,6 +13,8 @@ public:
|
|||
GPU_HW_ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch);
|
||||
~GPU_HW_ShaderGen();
|
||||
|
||||
std::string GenerateScreenVertexShader() const;
|
||||
|
||||
std::string GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading, bool textured, bool palette,
|
||||
bool page_texture, bool uv_limits, bool force_round_texcoords, bool pgxp_depth,
|
||||
bool disable_color_perspective) const;
|
||||
|
|
Loading…
Reference in New Issue