GPU/HW: Use sized tristrips instead of fullscreen quads
This commit is contained in:
parent
1063c3da7f
commit
ddffc055b9
|
@ -1057,6 +1057,15 @@ bool GPU_HW::CompileCommonShaders(Error* error)
|
|||
if (!m_fullscreen_quad_vertex_shader)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_fullscreen_quad_vertex_shader, "Fullscreen Quad Vertex Shader");
|
||||
|
||||
m_screen_quad_vertex_shader = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(),
|
||||
shadergen.GenerateScreenVertexShader(), error);
|
||||
if (!m_screen_quad_vertex_shader)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_screen_quad_vertex_shader, "Screen Quad Vertex Shader");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1537,13 +1546,21 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
|
||||
batch_shader_guard.Run();
|
||||
|
||||
static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = {
|
||||
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0,
|
||||
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)),
|
||||
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0,
|
||||
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)),
|
||||
};
|
||||
|
||||
// common state
|
||||
plconfig.input_layout.vertex_attributes = {};
|
||||
plconfig.input_layout.vertex_stride = 0;
|
||||
plconfig.input_layout.vertex_attributes = screen_vertex_attributes;
|
||||
plconfig.input_layout.vertex_stride = sizeof(ScreenVertex);
|
||||
plconfig.primitive = GPUPipeline::Primitive::TriangleStrips;
|
||||
plconfig.vertex_shader = m_screen_quad_vertex_shader.get();
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
plconfig.per_sample_shading = false;
|
||||
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
|
||||
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
|
||||
plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
|
||||
|
||||
// VRAM fill
|
||||
|
@ -1631,8 +1648,6 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
}
|
||||
}
|
||||
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
|
||||
// VRAM write replacement
|
||||
{
|
||||
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
|
||||
|
@ -1641,6 +1656,7 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
return false;
|
||||
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
|
||||
if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
|
@ -1649,6 +1665,11 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
return false;
|
||||
}
|
||||
|
||||
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
|
||||
plconfig.primitive = GPUPipeline::Primitive::Triangles;
|
||||
plconfig.input_layout.vertex_attributes = {};
|
||||
plconfig.input_layout.vertex_stride = 0;
|
||||
|
||||
// VRAM update depth
|
||||
if (m_write_mask_as_depth)
|
||||
{
|
||||
|
@ -1954,6 +1975,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written)
|
|||
|
||||
void GPU_HW::UpdateDepthBufferFromMaskBit()
|
||||
{
|
||||
GL_SCOPE_FMT("UpdateDepthBufferFromMaskBit()");
|
||||
DebugAssert(!m_pgxp_depth_buffer && m_vram_depth_texture && m_write_mask_as_depth);
|
||||
|
||||
// Viewport should already be set full, only need to fudge the scissor.
|
||||
|
@ -2997,9 +3019,10 @@ bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u
|
|||
|
||||
g_gpu_device->SetTextureSampler(0, tex, g_gpu_device->GetLinearSampler());
|
||||
g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get());
|
||||
g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height);
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
const GSVector4i rect(dst_x, dst_y, dst_x + width, dst_y + height);
|
||||
g_gpu_device->SetScissor(rect);
|
||||
DrawScreenQuad(rect);
|
||||
RestoreDeviceContext();
|
||||
return true;
|
||||
}
|
||||
|
@ -3196,6 +3219,30 @@ ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const
|
|||
return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE_RELEASE void GPU_HW::DrawScreenQuad(const GSVector4i bounds, const GSVector4 uv_bounds)
|
||||
{
|
||||
const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw());
|
||||
const GSVector2 fsize = GSVector2(m_vram_texture->GetSizeVec());
|
||||
const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f);
|
||||
const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy()));
|
||||
const GSVector4 xy = GSVector4::xyxy(x, y).xzyw();
|
||||
|
||||
DebugAssert(IsFlushed());
|
||||
|
||||
ScreenVertex* vertices;
|
||||
u32 space;
|
||||
u32 base_vertex;
|
||||
g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast<void**>(&vertices), &space, &base_vertex);
|
||||
|
||||
GSVector4::store<false>(&vertices[0], GSVector4::xyxy(xy.xy(), uv_bounds.xy()));
|
||||
GSVector4::store<false>(&vertices[1], GSVector4::xyxy(xy.zyzw().xy(), uv_bounds.zyzw().xy()));
|
||||
GSVector4::store<false>(&vertices[2], GSVector4::xyxy(xy.xwzw().xy(), uv_bounds.xwzw().xy()));
|
||||
GSVector4::store<false>(&vertices[3], GSVector4::xyxy(xy.zw(), uv_bounds.zw()));
|
||||
|
||||
g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4);
|
||||
g_gpu_device->Draw(4, base_vertex);
|
||||
}
|
||||
|
||||
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 active_line_lsb)
|
||||
{
|
||||
FlushRender();
|
||||
|
@ -3225,9 +3272,6 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
|
|||
const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT));
|
||||
g_gpu_device->SetPipeline(m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(interlaced_rendering)].get());
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetViewportAndScissor(scaled_bounds);
|
||||
|
||||
struct VRAMFillUBOData
|
||||
{
|
||||
u32 u_dst_x;
|
||||
|
@ -3247,7 +3291,10 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
|
|||
GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)));
|
||||
uniforms.u_interlaced_displayed_field = active_line_lsb;
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds);
|
||||
DrawScreenQuad(scaled_bounds);
|
||||
|
||||
RestoreDeviceContext();
|
||||
}
|
||||
|
@ -3357,14 +3404,15 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
|
|||
{
|
||||
DeactivateROV();
|
||||
|
||||
std::unique_ptr<GPUTexture> upload_texture;
|
||||
GPUDevice::AutoRecycleTexture upload_texture;
|
||||
u32 map_index;
|
||||
|
||||
if (!g_gpu_device->GetFeatures().supports_texture_buffers)
|
||||
{
|
||||
map_index = 0;
|
||||
upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture,
|
||||
GPUTexture::Format::R16U, GPUTexture::Flags::None, data, data_pitch);
|
||||
upload_texture =
|
||||
g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::R16U,
|
||||
GPUTexture::Flags::None, data, data_pitch);
|
||||
if (!upload_texture)
|
||||
{
|
||||
ERROR_LOG("Failed to get {}x{} upload texture. Things are gonna break.", width, height);
|
||||
|
@ -3406,21 +3454,17 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
|
|||
GetCurrentNormalizedVertexDepth()};
|
||||
|
||||
// the viewport should already be set to the full vram, so just adjust the scissor
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height());
|
||||
g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
|
||||
if (upload_texture)
|
||||
{
|
||||
g_gpu_device->SetTextureSampler(0, upload_texture.get(), g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->Draw(3, 0);
|
||||
g_gpu_device->RecycleTexture(std::move(upload_texture));
|
||||
}
|
||||
else
|
||||
{
|
||||
g_gpu_device->SetTextureBuffer(0, m_vram_upload_buffer.get());
|
||||
g_gpu_device->Draw(3, 0);
|
||||
}
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds);
|
||||
DrawScreenQuad(scaled_bounds);
|
||||
|
||||
RestoreDeviceContext();
|
||||
}
|
||||
|
@ -3492,12 +3536,13 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
|
|||
GetCurrentNormalizedVertexDepth()};
|
||||
|
||||
// VRAM read texture should already be bound.
|
||||
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetViewportAndScissor(dst_bounds_scaled);
|
||||
g_gpu_device->SetPipeline(m_vram_copy_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
|
||||
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(dst_bounds_scaled);
|
||||
DrawScreenQuad(dst_bounds_scaled);
|
||||
RestoreDeviceContext();
|
||||
|
||||
if (check_mask && !m_pgxp_depth_buffer)
|
||||
|
|
|
@ -121,6 +121,14 @@ private:
|
|||
|
||||
static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT));
|
||||
|
||||
struct alignas(16) ScreenVertex
|
||||
{
|
||||
float x;
|
||||
float y;
|
||||
float u;
|
||||
float v;
|
||||
};
|
||||
|
||||
struct alignas(16) BatchVertex
|
||||
{
|
||||
float x;
|
||||
|
@ -223,6 +231,8 @@ private:
|
|||
bool ShouldCheckForTexPageOverlap() const;
|
||||
|
||||
bool IsFlushed() const;
|
||||
void DrawScreenQuad(const GSVector4i bounds, const GSVector4 uv_bounds = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f));
|
||||
|
||||
void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
|
||||
void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd);
|
||||
void PrepareDraw(const GPUBackendDrawCommand* cmd);
|
||||
|
@ -380,4 +390,5 @@ private:
|
|||
|
||||
// common shaders
|
||||
std::unique_ptr<GPUShader> m_fullscreen_quad_vertex_shader;
|
||||
std::unique_ptr<GPUShader> m_screen_quad_vertex_shader;
|
||||
};
|
||||
|
|
|
@ -50,6 +50,27 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) const
|
|||
false);
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateScreenVertexShader() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0"}, 0, 1, {}, false, "", false, false, false);
|
||||
ss << R"(
|
||||
{
|
||||
// Depth set to 1 for PGXP depth buffer.
|
||||
v_pos = float4(a_pos, 1.0f, 1.0f);
|
||||
v_tex0 = a_tex0;
|
||||
|
||||
// NDC space Y flip in Vulkan.
|
||||
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
|
||||
v_pos.y = -v_pos.y;
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading,
|
||||
bool textured, bool palette, bool page_texture, bool uv_limits,
|
||||
bool force_round_texcoords, bool pgxp_depth,
|
||||
|
|
|
@ -13,6 +13,8 @@ public:
|
|||
GPU_HW_ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch);
|
||||
~GPU_HW_ShaderGen();
|
||||
|
||||
std::string GenerateScreenVertexShader() const;
|
||||
|
||||
std::string GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading, bool textured, bool palette,
|
||||
bool page_texture, bool uv_limits, bool force_round_texcoords, bool pgxp_depth,
|
||||
bool disable_color_perspective) const;
|
||||
|
|
Loading…
Reference in New Issue