GPU: Support emulating a depth buffer from PGXP depth values

This commit is contained in:
Connor McLaughlin 2020-12-23 01:10:49 +10:00
parent f393ea618e
commit aa1543271e
13 changed files with 242 additions and 47 deletions

View File

@ -75,6 +75,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display)
m_texture_filtering = GPUTextureFilter::Nearest;
}
m_pgxp_depth_buffer = g_settings.gpu_pgxp_depth_buffer;
PrintSettingsToLog();
return true;
}
@ -123,7 +124,8 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
m_true_color != g_settings.gpu_true_color || m_per_sample_shading != per_sample_shading ||
m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter ||
m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing);
m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing ||
m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer());
if (m_resolution_scale != resolution_scale)
{
@ -161,6 +163,14 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
if (!m_supports_dual_source_blend && TextureFilterRequiresDualSourceBlend(m_texture_filtering))
m_texture_filtering = GPUTextureFilter::Nearest;
if (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer())
{
m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer();
m_batch.use_depth_buffer = false;
if (m_pgxp_depth_buffer)
ClearDepthBuffer();
}
PrintSettingsToLog();
}
@ -202,6 +212,7 @@ void GPU_HW::PrintSettingsToLog()
Log_InfoPrintf("Texture Filtering: %s", Settings::GetTextureFilterDisplayName(m_texture_filtering));
Log_InfoPrintf("Dual-source blending: %s", m_supports_dual_source_blend ? "Supported" : "Not supported");
Log_InfoPrintf("Using UV limits: %s", m_using_uv_limits ? "YES" : "NO");
Log_InfoPrintf("Depth buffer: %s", m_pgxp_depth_buffer ? "YES" : "NO");
}
void GPU_HW::UpdateVRAMReadTexture()
@ -320,6 +331,44 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices)
vertices[i].SetUVLimits(min_u, max_u, min_v, max_v);
}
void GPU_HW::SetBatchDepthBuffer(bool enabled)
{
if (m_batch.use_depth_buffer == enabled)
return;
if (GetBatchVertexCount() > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCurrentCommand();
}
m_batch.use_depth_buffer = enabled;
m_last_depth_z = 1.0f;
}
void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices)
{
DebugAssert(num_vertices == 3 || num_vertices == 4);
float average_z;
if (num_vertices == 3)
average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w) / 3.0f, 1.0f);
else
average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w + vertices[3].w) / 4.0f, 1.0f);
if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold)
{
if (GetBatchVertexCount() > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCurrentCommand();
}
ClearDepthBuffer();
}
m_last_depth_z = average_z;
}
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
{
const float dx = x1 - x0;
@ -453,11 +502,20 @@ void GPU_HW::LoadVertices()
m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w);
}
}
if (pgxp)
{
if (!valid_w)
{
SetBatchDepthBuffer(false);
for (BatchVertex& v : vertices)
v.w = 1.0f;
}
else if (g_settings.gpu_pgxp_depth_buffer)
{
SetBatchDepthBuffer(true);
CheckForDepthClear(vertices.data(), num_vertices);
}
}
if (rc.quad_polygon && m_resolution_scale > 1)
HandleFlippedQuadTextureCoordinates(vertices.data());
@ -580,12 +638,13 @@ void GPU_HW::LoadVertices()
break;
}
// we can split the rectangle up into potentially 8 quads
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);
if (!IsDrawingAreaIsValid())
return;
// we can split the rectangle up into potentially 8 quads
SetBatchDepthBuffer(false);
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top;
for (s32 y_offset = 0; y_offset < rectangle_height;)
@ -634,6 +693,8 @@ void GPU_HW::LoadVertices()
case GPUPrimitive::Line:
{
SetBatchDepthBuffer(false);
if (!rc.polyline)
{
DebugAssert(GetBatchVertexSpace() >= 2);
@ -760,6 +821,10 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32
VRAMFillUBOData uniforms;
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
RGBA8ToFloat(color);
if (m_pgxp_depth_buffer)
uniforms.u_fill_color[3] = 1.0f;
uniforms.u_interlaced_displayed_field = GetActiveLineLSB();
return uniforms;
}
@ -879,6 +944,9 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
void GPU_HW::ResetBatchVertexDepth()
{
if (m_pgxp_depth_buffer)
return;
Log_PerfPrint("Resetting batch vertex depth");
FlushRender();
UpdateDepthBufferFromMaskBit();
@ -1023,6 +1091,9 @@ void GPU_HW::FlushRender()
{
m_drawing_area_changed = false;
SetScissorFromDrawingArea();
if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f)
ClearDepthBuffer();
}
if (m_batch_ubo_dirty)

View File

@ -100,6 +100,7 @@ protected:
bool interlacing;
bool set_mask_while_drawing;
bool check_mask_before_draw;
bool use_depth_buffer;
// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
@ -179,6 +180,7 @@ protected:
virtual void UpdateVRAMReadTexture();
virtual void UpdateDepthBufferFromMaskBit() = 0;
virtual void ClearDepthBuffer() = 0;
virtual void SetScissorFromDrawingArea() = 0;
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0;
@ -280,7 +282,10 @@ protected:
/// Computes polygon U/V boundaries.
static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices);
static bool AreUVLimitsNeeded();
/// Sets the depth test flag for PGXP depth buffering.
void SetBatchDepthBuffer(bool enabled);
void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices);
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
@ -289,6 +294,7 @@ protected:
BatchVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0;
s32 m_current_depth = 0;
float m_last_depth_z = 1.0f;
u32 m_resolution_scale = 1;
u32 m_multisamples = 1;
@ -303,6 +309,7 @@ protected:
bool m_supports_per_sample_shading = false;
bool m_supports_dual_source_blend = false;
bool m_using_uv_limits = false;
bool m_pgxp_depth_buffer = false;
BatchConfig m_batch = {};
BatchUBOData m_batch_ubo_data = {};

View File

@ -235,9 +235,10 @@ void GPU_HW_D3D11::ClearFramebuffer()
{
static constexpr std::array<float, 4> color = {};
m_context->ClearRenderTargetView(m_vram_texture.GetD3DRTV(), color.data());
m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 0.0f, 0);
m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, m_pgxp_depth_buffer ? 1.0f : 0.0f, 0);
m_context->ClearRenderTargetView(m_display_texture, color.data());
SetFullVRAMDirtyRectangle();
m_last_depth_z = 1.0f;
}
void GPU_HW_D3D11::DestroyFramebuffer()
@ -287,6 +288,7 @@ bool GPU_HW_D3D11::CreateStateObjects()
rs_desc.CullMode = D3D11_CULL_NONE;
rs_desc.ScissorEnable = TRUE;
rs_desc.MultisampleEnable = IsUsingMultisampling();
rs_desc.DepthClipEnable = FALSE;
hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
@ -316,11 +318,16 @@ bool GPU_HW_D3D11::CreateStateObjects()
if (FAILED(hr))
return false;
ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL;
ds_desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL;
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_less_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL;
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_greater_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
CD3D11_BLEND_DESC bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
@ -377,6 +384,7 @@ void GPU_HW_D3D11::DestroyStateObjects()
m_point_sampler_state.Reset();
m_blend_no_color_writes_state.Reset();
m_blend_disabled_state.Reset();
m_depth_test_greater_state.Reset();
m_depth_test_less_state.Reset();
m_depth_test_always_state.Reset();
m_depth_disabled_state.Reset();
@ -392,7 +400,7 @@ bool GPU_HW_D3D11::CompileShaders()
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_supports_dual_source_blend);
m_pgxp_depth_buffer, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3);
@ -622,8 +630,12 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte
const GPUTransparencyMode transparency_mode =
(render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode;
m_context->OMSetBlendState(m_batch_blend_states[static_cast<u8>(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu);
m_context->OMSetDepthStencilState(
m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
(m_batch.use_depth_buffer ?
m_depth_test_less_state.Get() :
(m_batch.check_mask_before_draw ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get())),
0);
m_context->Draw(num_vertices, base_vertex);
}
@ -798,7 +810,8 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d
const VRAMWriteUBOData uniforms =
GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, set_mask, check_mask);
m_context->OMSetDepthStencilState(check_mask ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->OMSetDepthStencilState(
(check_mask && !m_batch.use_depth_buffer) ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());
// the viewport should already be set to the full vram, so just adjust the scissor
@ -825,13 +838,15 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
dst_bounds_scaled.GetHeight());
m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->OMSetDepthStencilState((m_GPUSTAT.check_mask_before_draw && !m_batch.use_depth_buffer) ?
m_depth_test_greater_state.Get() :
m_depth_test_always_state.Get(),
0);
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms));
RestoreGraphicsAPIState();
if (m_GPUSTAT.check_mask_before_draw)
if (m_GPUSTAT.check_mask_before_draw && !m_batch.use_depth_buffer)
m_current_depth++;
return;
@ -877,6 +892,9 @@ void GPU_HW_D3D11::UpdateVRAMReadTexture()
void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit()
{
if (m_pgxp_depth_buffer)
return;
SetViewportAndScissor(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
m_context->OMSetRenderTargets(0, nullptr, m_vram_depth_view.Get());
@ -890,6 +908,14 @@ void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit()
RestoreGraphicsAPIState();
}
void GPU_HW_D3D11::ClearDepthBuffer()
{
DebugAssert(m_pgxp_depth_buffer);
m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 1.0f, 0);
m_last_depth_z = 1.0f;
}
std::unique_ptr<GPU> GPU::CreateHardwareD3D11Renderer()
{
return std::make_unique<GPU_HW_D3D11>();

View File

@ -35,6 +35,7 @@ protected:
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void ClearDepthBuffer() override;
void SetScissorFromDrawingArea() override;
void MapBatchVertexPointer(u32 required_vertices) override;
void UnmapBatchVertexPointer(u32 used_vertices) override;
@ -94,6 +95,7 @@ private:
ComPtr<ID3D11DepthStencilState> m_depth_disabled_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_always_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_less_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_greater_state;
ComPtr<ID3D11BlendState> m_blend_disabled_state;
ComPtr<ID3D11BlendState> m_blend_no_color_writes_state;

View File

@ -124,6 +124,7 @@ void GPU_HW_OpenGL::RestoreGraphicsAPIState()
m_uniform_stream_buffer->Bind();
m_vram_read_texture.Bind();
SetBlendMode();
m_current_depth_test = 0;
SetDepthFunc();
SetScissorFromDrawingArea();
m_batch_ubo_dirty = true;
@ -311,11 +312,14 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
void GPU_HW_OpenGL::ClearFramebuffer()
{
const float depth_clear_value = m_pgxp_depth_buffer ? 1.0f : 0.0f;
glDisable(GL_SCISSOR_TEST);
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
IsGLES() ? glClearDepthf(0.0f) : glClearDepth(0.0f);
IsGLES() ? glClearDepthf(depth_clear_value) : glClearDepth(depth_clear_value);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnable(GL_SCISSOR_TEST);
m_last_depth_z = 1.0f;
SetFullVRAMDirtyRectangle();
}
@ -386,7 +390,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_supports_dual_source_blend);
m_pgxp_depth_buffer, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 5;
@ -592,11 +596,7 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert
SetBlendMode();
}
if (m_current_check_mask_before_draw != m_batch.check_mask_before_draw)
{
m_current_check_mask_before_draw = m_batch.check_mask_before_draw;
SetDepthFunc();
}
glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices);
}
@ -620,7 +620,16 @@ void GPU_HW_OpenGL::SetBlendMode()
void GPU_HW_OpenGL::SetDepthFunc()
{
glDepthFunc(m_current_check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
SetDepthFunc(m_batch.use_depth_buffer ? GL_LEQUAL : (m_batch.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS));
}
void GPU_HW_OpenGL::SetDepthFunc(GLenum func)
{
if (m_current_depth_test == func)
return;
glDepthFunc(func);
m_current_depth_test = func;
}
void GPU_HW_OpenGL::SetScissorFromDrawingArea()
@ -830,7 +839,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
m_vram_interlaced_fill_program.Bind();
UploadUniformBuffer(&uniforms, sizeof(uniforms));
glDisable(GL_BLEND);
glDepthFunc(GL_ALWAYS);
SetDepthFunc(GL_ALWAYS);
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
@ -852,7 +861,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
m_texture_stream_buffer->Unbind();
glDisable(GL_BLEND);
glDepthFunc(check_mask ? GL_GEQUAL : GL_ALWAYS);
SetDepthFunc((check_mask && !m_pgxp_depth_buffer) ? GL_GEQUAL : GL_ALWAYS);
m_vram_write_program.Bind();
if (m_use_ssbo_for_vram_writes)
@ -961,7 +970,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
SetDepthFunc((m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) ? GL_GEQUAL : GL_ALWAYS);
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
glViewport(dst_bounds_scaled.left,
@ -1056,6 +1065,9 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture()
void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit()
{
if (m_pgxp_depth_buffer)
return;
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
@ -1073,6 +1085,15 @@ void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit()
m_vram_read_texture.Bind();
}
void GPU_HW_OpenGL::ClearDepthBuffer()
{
glDisable(GL_SCISSOR_TEST);
IsGLES() ? glClearDepthf(1.0f) : glClearDepth(1.0f);
glClear(GL_DEPTH_BUFFER_BIT);
glEnable(GL_SCISSOR_TEST);
m_last_depth_z = 1.0f;
}
std::unique_ptr<GPU> GPU::CreateHardwareOpenGLRenderer()
{
return std::make_unique<GPU_HW_OpenGL>();

View File

@ -31,6 +31,7 @@ protected:
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void ClearDepthBuffer() override;
void SetScissorFromDrawingArea() override;
void MapBatchVertexPointer(u32 required_vertices) override;
void UnmapBatchVertexPointer(u32 used_vertices) override;
@ -63,6 +64,7 @@ private:
bool CompilePrograms();
void SetDepthFunc();
void SetDepthFunc(GLenum func);
void SetBlendMode();
// downsample texture - used for readbacks at >1xIR.
@ -98,7 +100,7 @@ private:
bool m_supports_geometry_shaders = false;
bool m_use_ssbo_for_vram_writes = false;
bool m_current_check_mask_before_draw = false;
GLenum m_current_depth_test = 0;
GPUTransparencyMode m_current_transparency_mode = GPUTransparencyMode::Disabled;
BatchRenderMode m_current_render_mode = BatchRenderMode::TransparencyDisabled;
};

View File

@ -5,10 +5,12 @@
GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples,
bool per_sample_shading, bool true_color, bool scaled_dithering,
GPUTextureFilter texture_filtering, bool uv_limits, bool supports_dual_source_blend)
GPUTextureFilter texture_filtering, bool uv_limits, bool pgxp_depth,
bool supports_dual_source_blend)
: ShaderGen(render_api, supports_dual_source_blend), m_resolution_scale(resolution_scale),
m_multisamples(multisamples), m_true_color(true_color), m_per_sample_shading(per_sample_shading),
m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits)
m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits),
m_pgxp_depth(pgxp_depth)
{
}
@ -84,6 +86,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
WriteHeader(ss);
DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "UV_LIMITS", m_uv_limits);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth);
WriteCommonFunctions(ss);
WriteBatchUniformBuffer(ss);
@ -135,8 +138,15 @@ CONSTANT float TEX_EPSILON = 0.00001;
// 0..+1023 -> -1..1
float pos_x = ((a_pos.x + vertex_offset) / 512.0) - 1.0;
float pos_y = ((a_pos.y + vertex_offset) / -256.0) + 1.0;
#if PGXP_DEPTH
// Ignore mask Z when using PGXP depth.
float pos_z = a_pos.w;
float pos_w = a_pos.w;
#else
float pos_z = a_pos.z;
float pos_w = a_pos.w;
#endif
#if API_OPENGL || API_OPENGL_ES
pos_y += POS_EPSILON;
@ -689,6 +699,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filter != GPUTextureFilter::Nearest);
DefineMacro(ss, "UV_LIMITS", m_uv_limits);
DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth);
WriteCommonFunctions(ss);
WriteBatchUniformBuffer(ss);
@ -800,17 +811,18 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
{
DeclareFragmentEntryPoint(ss, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}},
true, use_dual_source ? 2 : 1, true, UsingMSAA(), UsingPerSampleShading());
true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading());
}
else
{
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true,
UsingMSAA(), UsingPerSampleShading());
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1,
!m_pgxp_depth, UsingMSAA(), UsingPerSampleShading());
}
}
else
{
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, true, UsingMSAA(), UsingPerSampleShading());
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(),
UsingPerSampleShading());
}
ss << R"(
@ -939,7 +951,9 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
o_col0 = float4(color, u_dst_alpha_factor / ialpha);
#endif
#if !PGXP_DEPTH
o_depth = oalpha * v_pos.z;
#endif
}
else
{
@ -962,7 +976,9 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
#endif
#endif
#if !PGXP_DEPTH
o_depth = oalpha * v_pos.z;
#endif
}
#else
// Non-transparency won't enable blending so we can write the mask here regardless.
@ -972,7 +988,9 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
#endif
#if !PGXP_DEPTH
o_depth = oalpha * v_pos.z;
#endif
#endif
}
)";
@ -1196,6 +1214,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth);
DeclareUniformBuffer(ss,
{"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset",
"uint u_mask_or_bits", "float u_depth_value"},
@ -1243,7 +1262,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
uint value = GET_VALUE(buffer_offset) | u_mask_or_bits;
o_col0 = RGBA5551ToRGBA8(value);
#if !PGXP_DEPTH
o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0;
#else
o_depth = 1.0;
#endif
})";
return ss.str();
@ -1257,6 +1280,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth);
DeclareUniformBuffer(ss,
{"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size",
"bool u_set_mask_bit", "float u_depth_value"},
@ -1291,7 +1315,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
float4 color = LOAD_TEXTURE(samp0, int2(src_coords), 0);
#endif
o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a);
#if !PGXP_DEPTH
o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0));
#else
o_depth = 1.0f;
#endif
})";
return ss.str();

View File

@ -7,7 +7,7 @@ class GPU_HW_ShaderGen : public ShaderGen
public:
GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading,
bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits,
bool supports_dual_source_blend);
bool pgxp_depth, bool supports_dual_source_blend);
~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured);
@ -36,4 +36,5 @@ private:
bool m_scaled_dithering;
GPUTextureFilter m_texture_filter;
bool m_uv_limits;
bool m_pgxp_depth;
};

View File

@ -506,8 +506,8 @@ void GPU_HW_Vulkan::ClearFramebuffer()
m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
static constexpr VkClearColorValue cc = {};
const VkClearDepthStencilValue cds = {m_pgxp_depth_buffer ? 1.0f : 0.0f};
static constexpr VkImageSubresourceRange csrr = {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u};
static constexpr VkClearDepthStencilValue cds = {};
static constexpr VkImageSubresourceRange dsrr = {VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u};
vkCmdClearColorImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), &cc, 1u, &csrr);
vkCmdClearDepthStencilImage(cmdbuf, m_vram_depth_texture.GetImage(), m_vram_depth_texture.GetLayout(), &cds, 1u,
@ -515,6 +515,7 @@ void GPU_HW_Vulkan::ClearFramebuffer()
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
m_last_depth_z = 1.0f;
SetFullVRAMDirtyRectangle();
}
@ -597,7 +598,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_supports_dual_source_blend);
m_pgxp_depth_buffer, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3);
@ -659,7 +660,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
Vulkan::GraphicsPipelineBuilder gpbuilder;
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
for (u8 depth_test = 0; depth_test < 2; depth_test++)
for (u8 depth_test = 0; depth_test < 3; depth_test++)
{
for (u8 render_mode = 0; render_mode < 4; render_mode++)
{
@ -671,6 +672,8 @@ bool GPU_HW_Vulkan::CompilePipelines()
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
static constexpr std::array<VkCompareOp, 3> depth_test_values = {
VK_COMPARE_OP_ALWAYS, VK_COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_LESS_OR_EQUAL};
const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
gpbuilder.SetPipelineLayout(m_batch_pipeline_layout);
@ -692,8 +695,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]);
gpbuilder.SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE);
gpbuilder.SetDepthState(true, true,
(depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS);
gpbuilder.SetDepthState(true, true, depth_test_values[depth_test]);
gpbuilder.SetNoBlendingState();
gpbuilder.SetMultisamples(m_multisamples, m_per_sample_shading);
@ -935,11 +937,11 @@ void GPU_HW_Vulkan::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
// [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
const u8 depth_test = BoolToUInt8(m_batch.check_mask_before_draw) | (BoolToUInt8(m_batch.use_depth_buffer) << 1);
VkPipeline pipeline =
m_batch_pipelines[BoolToUInt8(m_batch.check_mask_before_draw)][static_cast<u8>(render_mode)]
[static_cast<u8>(m_batch.texture_mode)][static_cast<u8>(m_batch.transparency_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)];
m_batch_pipelines[depth_test][static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)][static_cast<u8>(
m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)];
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
vkCmdDraw(cmdbuf, num_vertices, 1, base_vertex, 0);
@ -1171,7 +1173,8 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index, set_mask, check_mask);
vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
&uniforms);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipelines[BoolToUInt8(check_mask)]);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer)]);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipeline_layout, 0, 1,
&m_vram_write_descriptor_set, 0, nullptr);
@ -1201,7 +1204,7 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]);
m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer)]);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
&m_vram_copy_descriptor_set, 0, nullptr);
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
@ -1283,6 +1286,9 @@ void GPU_HW_Vulkan::UpdateVRAMReadTexture()
void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit()
{
if (m_pgxp_depth_buffer)
return;
EndRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
@ -1304,6 +1310,22 @@ void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit()
RestoreGraphicsAPIState();
}
void GPU_HW_Vulkan::ClearDepthBuffer()
{
EndRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
static const VkClearDepthStencilValue cds = {1.0f};
static constexpr VkImageSubresourceRange dsrr = {VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u};
vkCmdClearDepthStencilImage(cmdbuf, m_vram_depth_texture.GetImage(), m_vram_depth_texture.GetLayout(), &cds, 1u,
&dsrr);
m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL);
m_last_depth_z = 1.0f;
}
std::unique_ptr<GPU> GPU::CreateHardwareVulkanRenderer()
{
return std::make_unique<GPU_HW_Vulkan>();

View File

@ -30,6 +30,7 @@ protected:
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void ClearDepthBuffer() override;
void SetScissorFromDrawingArea() override;
void MapBatchVertexPointer(u32 required_vertices) override;
void UnmapBatchVertexPointer(u32 used_vertices) override;
@ -107,7 +108,7 @@ private:
VkBufferView m_texture_stream_buffer_view = VK_NULL_HANDLE;
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
DimensionalArray<VkPipeline, 2, 2, 5, 9, 4, 2> m_batch_pipelines{};
DimensionalArray<VkPipeline, 2, 2, 5, 9, 4, 3> m_batch_pipelines{};
// [interlaced]
std::array<VkPipeline, 2> m_vram_fill_pipelines{};

View File

@ -487,6 +487,8 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
si.SetBoolValue("GPU", "PGXPCPU", false);
si.SetBoolValue("GPU", "PGXPPreserveProjFP", false);
si.SetFloatValue("GPU", "PGXPTolerance", -1.0f);
si.SetBoolValue("GPU", "PGXPDepthBuffer", false);
si.SetFloatValue("GPU", "PGXPDepthClearThreshold", Settings::DEFAULT_GPU_PGXP_DEPTH_THRESHOLD);
si.SetStringValue("Display", "CropMode", Settings::GetDisplayCropModeName(Settings::DEFAULT_DISPLAY_CROP_MODE));
si.SetIntValue("Display", "ActiveStartOffset", 0);
@ -694,6 +696,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
g_settings.display_crop_mode != old_settings.display_crop_mode ||
g_settings.display_aspect_ratio != old_settings.display_aspect_ratio ||
g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable ||
g_settings.gpu_pgxp_depth_buffer != old_settings.gpu_pgxp_depth_buffer ||
g_settings.display_active_start_offset != old_settings.display_active_start_offset ||
g_settings.display_active_end_offset != old_settings.display_active_end_offset ||
g_settings.display_line_start_offset != old_settings.display_line_start_offset ||

View File

@ -160,6 +160,8 @@ void Settings::Load(SettingsInterface& si)
gpu_pgxp_cpu = si.GetBoolValue("GPU", "PGXPCPU", false);
gpu_pgxp_preserve_proj_fp = si.GetBoolValue("GPU", "PGXPPreserveProjFP", false);
gpu_pgxp_tolerance = si.GetFloatValue("GPU", "PGXPTolerance", -1.0f);
gpu_pgxp_depth_buffer = si.GetBoolValue("GPU", "PGXPDepthBuffer", false);
SetPGXPDepthClearThreshold(si.GetFloatValue("GPU", "PGXPDepthClearThreshold", DEFAULT_GPU_PGXP_DEPTH_THRESHOLD));
display_crop_mode =
ParseDisplayCropMode(
@ -298,6 +300,8 @@ void Settings::Save(SettingsInterface& si) const
si.SetBoolValue("GPU", "PGXPCPU", gpu_pgxp_cpu);
si.SetBoolValue("GPU", "PGXPPreserveProjFP", gpu_pgxp_preserve_proj_fp);
si.SetFloatValue("GPU", "PGXPTolerance", gpu_pgxp_tolerance);
si.SetBoolValue("GPU", "PGXPDepthBuffer", gpu_pgxp_depth_buffer);
si.SetFloatValue("GPU", "PGXPDepthClearThreshold", GetPGXPDepthClearThreshold());
si.SetStringValue("Display", "CropMode", GetDisplayCropModeName(display_crop_mode));
si.SetIntValue("Display", "ActiveStartOffset", display_active_start_offset);

View File

@ -111,6 +111,7 @@ struct Settings
bool gpu_pgxp_vertex_cache = false;
bool gpu_pgxp_cpu = false;
bool gpu_pgxp_preserve_proj_fp = false;
bool gpu_pgxp_depth_buffer = false;
DisplayCropMode display_crop_mode = DisplayCropMode::None;
DisplayAspectRatio display_aspect_ratio = DisplayAspectRatio::Auto;
s16 display_active_start_offset = 0;
@ -130,6 +131,7 @@ struct Settings
bool video_sync_enabled = true;
float display_max_fps = 0.0f;
float gpu_pgxp_tolerance = -1.0f;
float gpu_pgxp_depth_clear_threshold = 300.0f / 4096.0f;
bool cdrom_read_thread = true;
bool cdrom_region_check = true;
@ -194,6 +196,10 @@ struct Settings
return gpu_pgxp_enable ? (gpu_pgxp_cpu ? PGXPMode::CPU : PGXPMode::Memory) : PGXPMode::Disabled;
}
ALWAYS_INLINE bool UsingPGXPDepthBuffer() const { return gpu_pgxp_enable && gpu_pgxp_depth_buffer; }
ALWAYS_INLINE float GetPGXPDepthClearThreshold() const { return gpu_pgxp_depth_clear_threshold * 4096.0f; }
ALWAYS_INLINE void SetPGXPDepthClearThreshold(float value) { gpu_pgxp_depth_clear_threshold = value / 4096.0f; }
ALWAYS_INLINE bool IsUsingFastmem() const
{
return (cpu_fastmem_mode != CPUFastmemMode::Disabled && cpu_execution_mode == CPUExecutionMode::Recompiler &&
@ -281,6 +287,7 @@ struct Settings
#endif
static constexpr GPUTextureFilter DEFAULT_GPU_TEXTURE_FILTER = GPUTextureFilter::Nearest;
static constexpr ConsoleRegion DEFAULT_CONSOLE_REGION = ConsoleRegion::Auto;
static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f;
#ifdef WITH_RECOMPILER
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler;