GPU/HW: Implement oversized VRAM fills in hardware

Fixes downscaling in Bugs and Taz PAL.
This commit is contained in:
Connor McLaughlin 2021-07-21 19:22:04 +10:00
parent 7ea78ad2df
commit 70209db402
12 changed files with 146 additions and 140 deletions

View File

@ -976,6 +976,10 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32
color = VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)); color = VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color));
VRAMFillUBOData uniforms; VRAMFillUBOData uniforms;
uniforms.u_dst_x = (x % VRAM_WIDTH) * m_resolution_scale;
uniforms.u_dst_y = (y % VRAM_HEIGHT) * m_resolution_scale;
uniforms.u_end_x = ((x + width) % VRAM_WIDTH) * m_resolution_scale;
uniforms.u_end_y = ((y + height) % VRAM_HEIGHT) * m_resolution_scale;
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
RGBA8ToFloat(color); RGBA8ToFloat(color);

View File

@ -125,6 +125,10 @@ protected:
struct VRAMFillUBOData struct VRAMFillUBOData
{ {
u32 u_dst_x;
u32 u_dst_y;
u32 u_end_x;
u32 u_end_y;
float u_fill_color[4]; float u_fill_color[4];
u32 u_interlaced_displayed_field; u32 u_interlaced_displayed_field;
}; };
@ -268,13 +272,19 @@ protected:
/// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled /// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
/// on a per-pixel basis, and the opaque pixels shouldn't be blended at all. /// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
bool NeedsTwoPassRendering() const ALWAYS_INLINE bool NeedsTwoPassRendering() const
{ {
return (m_batch.texture_mode != GPUTextureMode::Disabled && return (m_batch.texture_mode != GPUTextureMode::Disabled &&
(m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground || (m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ||
(!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled)));
} }
/// Returns true if the specified VRAM fill is oversized.
ALWAYS_INLINE static bool IsVRAMFillOversized(u32 x, u32 y, u32 width, u32 height)
{
return ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT);
}
ALWAYS_INLINE bool IsUsingSoftwareRendererForReadbacks() { return static_cast<bool>(m_sw_renderer); } ALWAYS_INLINE bool IsUsingSoftwareRendererForReadbacks() { return static_cast<bool>(m_sw_renderer); }
void FillBackendCommandParameters(GPUBackendCommand* cmd) const; void FillBackendCommandParameters(GPUBackendCommand* cmd) const;

View File

@ -508,7 +508,8 @@ bool GPU_HW_D3D11::CompileShaders()
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_pgxp_depth_buffer, m_supports_dual_source_blend); m_pgxp_depth_buffer, m_supports_dual_source_blend);
ShaderCompileProgressTracker progress("Compiling Shaders", 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3) + 1); ShaderCompileProgressTracker progress("Compiling Shaders",
1 + 1 + 2 + (4 * 9 * 2 * 2) + 1 + (2 * 2) + 4 + (2 * 3) + 1);
// input layout // input layout
{ {
@ -585,18 +586,19 @@ bool GPU_HW_D3D11::CompileShaders()
progress.Increment(); progress.Increment();
m_vram_fill_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateFillFragmentShader()); for (u8 wrapped = 0; wrapped < 2; wrapped++)
if (!m_vram_fill_pixel_shader) {
return false; for (u8 interlaced = 0; interlaced < 2; interlaced++)
{
const std::string ps =
shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced));
m_vram_fill_pixel_shaders[wrapped][interlaced] = shader_cache.GetPixelShader(m_device.Get(), ps);
if (!m_vram_fill_pixel_shaders[wrapped][interlaced])
return false;
progress.Increment(); progress.Increment();
}
m_vram_interlaced_fill_pixel_shader = }
shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateInterlacedFillFragmentShader());
if (!m_vram_interlaced_fill_pixel_shader)
return false;
progress.Increment();
m_vram_read_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMReadFragmentShader()); m_vram_read_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMReadFragmentShader());
if (!m_vram_read_pixel_shader) if (!m_vram_read_pixel_shader)
@ -682,8 +684,7 @@ void GPU_HW_D3D11::DestroyShaders()
m_vram_copy_pixel_shader.Reset(); m_vram_copy_pixel_shader.Reset();
m_vram_write_pixel_shader.Reset(); m_vram_write_pixel_shader.Reset();
m_vram_read_pixel_shader.Reset(); m_vram_read_pixel_shader.Reset();
m_vram_interlaced_fill_pixel_shader.Reset(); m_vram_fill_pixel_shaders = {};
m_vram_fill_pixel_shader.Reset();
m_copy_pixel_shader.Reset(); m_copy_pixel_shader.Reset();
m_uv_quad_vertex_shader.Reset(); m_uv_quad_vertex_shader.Reset();
m_screen_quad_vertex_shader.Reset(); m_screen_quad_vertex_shader.Reset();
@ -976,26 +977,18 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
if (IsUsingSoftwareRendererForReadbacks()) if (IsUsingSoftwareRendererForReadbacks())
FillSoftwareRendererVRAM(x, y, width, height, color); FillSoftwareRendererVRAM(x, y, width, height, color);
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{
// CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
return;
}
GPU_HW::FillVRAM(x, y, width, height, color); GPU_HW::FillVRAM(x, y, width, height, color);
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, const Common::Rectangle<u32> bounds(GetVRAMTransferBounds(x, y, width, height));
height * m_resolution_scale); SetViewportAndScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale,
DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() : bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale);
m_vram_fill_pixel_shader.Get(),
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
DrawUtilityShader(m_vram_fill_pixel_shaders[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))]
[BoolToUInt8(IsInterlacedRenderingEnabled())]
.Get(),
&uniforms, sizeof(uniforms)); &uniforms, sizeof(uniforms));
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();

View File

@ -123,8 +123,7 @@ private:
ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader; ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader;
ComPtr<ID3D11VertexShader> m_uv_quad_vertex_shader; ComPtr<ID3D11VertexShader> m_uv_quad_vertex_shader;
ComPtr<ID3D11PixelShader> m_copy_pixel_shader; ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_fill_pixel_shader; std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_vram_fill_pixel_shaders; // [wrapped][interlaced]
ComPtr<ID3D11PixelShader> m_vram_interlaced_fill_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader; ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader; ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader; ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;

View File

@ -420,8 +420,8 @@ bool GPU_HW_D3D12::CompilePipelines()
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_pgxp_depth_buffer, m_supports_dual_source_blend); m_pgxp_depth_buffer, m_supports_dual_source_blend);
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 +
2 + 2 + 1 + 1 + (2 * 3) + 1); (2 * 2) + 2 + 2 + 1 + 1 + (2 * 3) + 1);
// vertex shaders - [textured] // vertex shaders - [textured]
// fragment shaders - [render_mode][texture_mode][dithering][interlacing] // fragment shaders - [render_mode][texture_mode][dithering][interlacing]
@ -561,23 +561,24 @@ bool GPU_HW_D3D12::CompilePipelines()
gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetFormat()); gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetFormat());
// VRAM fill // VRAM fill
for (u8 wrapped = 0; wrapped < 2; wrapped++)
{ {
for (u8 interlaced = 0; interlaced < 2; interlaced++) for (u8 interlaced = 0; interlaced < 2; interlaced++)
{ {
ComPtr<ID3DBlob> fs = shader_cache.GetPixelShader( ComPtr<ID3DBlob> fs = shader_cache.GetPixelShader(
(interlaced == 0) ? shadergen.GenerateFillFragmentShader() : shadergen.GenerateInterlacedFillFragmentShader()); shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)));
if (!fs) if (!fs)
return false; return false;
gpbuilder.SetPixelShader(fs.Get()); gpbuilder.SetPixelShader(fs.Get());
gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS); gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS);
m_vram_fill_pipelines[interlaced] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); m_vram_fill_pipelines[wrapped][interlaced] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false);
if (!m_vram_fill_pipelines[interlaced]) if (!m_vram_fill_pipelines[wrapped][interlaced])
return false; return false;
D3D12::SetObjectNameFormatted(m_vram_fill_pipelines[interlaced].Get(), "VRAM Fill Pipeline Interlacing=%u", D3D12::SetObjectNameFormatted(m_vram_fill_pipelines[wrapped][interlaced].Get(),
interlaced); "VRAM Fill Pipeline Wrapped=%u,Interlacing=%u", wrapped, interlaced);
progress.Increment(); progress.Increment();
} }
@ -994,31 +995,22 @@ void GPU_HW_D3D12::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
FillSoftwareRendererVRAM(x, y, width, height, color); FillSoftwareRendererVRAM(x, y, width, height, color);
// TODO: Use fast clear // TODO: Use fast clear
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{
// CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
return;
}
GPU_HW::FillVRAM(x, y, width, height, color); GPU_HW::FillVRAM(x, y, width, height, color);
x *= m_resolution_scale;
y *= m_resolution_scale;
width *= m_resolution_scale;
height *= m_resolution_scale;
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList();
cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get());
cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0); cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0);
cmdlist->SetGraphicsRootDescriptorTable(1, g_d3d12_context->GetNullSRVDescriptor()); cmdlist->SetGraphicsRootDescriptorTable(1, g_d3d12_context->GetNullSRVDescriptor());
cmdlist->SetPipelineState(m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())].Get()); cmdlist->SetPipelineState(m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))]
D3D12::SetViewportAndScissor(cmdlist, x, y, width, height); [BoolToUInt8(IsInterlacedRenderingEnabled())]
.Get());
const Common::Rectangle<u32> bounds(GetVRAMTransferBounds(x, y, width, height));
D3D12::SetViewportAndScissor(cmdlist, bounds.left * m_resolution_scale, bounds.top * m_resolution_scale,
bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale);
cmdlist->DrawInstanced(3, 1, 0, 0); cmdlist->DrawInstanced(3, 1, 0, 0);
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();

View File

@ -92,8 +92,8 @@ private:
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
DimensionalArray<ComPtr<ID3D12PipelineState>, 2, 2, 5, 9, 4, 2> m_batch_pipelines; DimensionalArray<ComPtr<ID3D12PipelineState>, 2, 2, 5, 9, 4, 2> m_batch_pipelines;
// [interlaced] // [wrapped][interlaced]
std::array<ComPtr<ID3D12PipelineState>, 2> m_vram_fill_pipelines; DimensionalArray<ComPtr<ID3D12PipelineState>, 2, 2> m_vram_fill_pipelines;
// [depth_test] // [depth_test]
std::array<ComPtr<ID3D12PipelineState>, 2> m_vram_write_pipelines; std::array<ComPtr<ID3D12PipelineState>, 2> m_vram_write_pipelines;

View File

@ -517,7 +517,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_pgxp_depth_buffer, m_supports_dual_source_blend); m_pgxp_depth_buffer, m_supports_dual_source_blend);
ShaderCompileProgressTracker progress("Compiling Programs", (4 * 9 * 2 * 2) + (2 * 3) + 1 + 1 + 1 + 1 + 1 + 1); ShaderCompileProgressTracker progress("Compiling Programs", (4 * 9 * 2 * 2) + (2 * 3) + (2 * 2) + 1 + 1 + 1 + 1 + 1);
for (u32 render_mode = 0; render_mode < 4; render_mode++) for (u32 render_mode = 0; render_mode < 4; render_mode++)
{ {
@ -609,22 +609,29 @@ bool GPU_HW_OpenGL::CompilePrograms()
} }
} }
std::optional<GL::Program> prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, for (u8 wrapped = 0; wrapped < 2; wrapped++)
shadergen.GenerateInterlacedFillFragmentShader(), {
[this, use_binding_layout](GL::Program& prog) { for (u8 interlaced = 0; interlaced < 2; interlaced++)
if (!IsGLES() && !use_binding_layout) {
prog.BindFragData(0, "o_col0"); std::optional<GL::Program> prog = shader_cache.GetProgram(
}); shadergen.GenerateScreenQuadVertexShader(), {},
if (!prog) shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)),
return false; [this, use_binding_layout](GL::Program& prog) {
if (!IsGLES() && !use_binding_layout)
prog.BindFragData(0, "o_col0");
});
if (!prog)
return false;
if (!use_binding_layout) if (!use_binding_layout)
prog->BindUniformBlock("UBOBlock", 1); prog->BindUniformBlock("UBOBlock", 1);
m_vram_interlaced_fill_program = std::move(*prog); m_vram_fill_programs[wrapped][interlaced] = std::move(*prog);
progress.Increment(); progress.Increment();
}
}
prog = std::optional<GL::Program> prog =
shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, shadergen.GenerateVRAMReadFragmentShader(), shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, shadergen.GenerateVRAMReadFragmentShader(),
[this, use_binding_layout](GL::Program& prog) { [this, use_binding_layout](GL::Program& prog) {
if (!IsGLES() && !use_binding_layout) if (!IsGLES() && !use_binding_layout)
@ -1014,28 +1021,17 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
if (IsUsingSoftwareRendererForReadbacks()) if (IsUsingSoftwareRendererForReadbacks())
FillSoftwareRendererVRAM(x, y, width, height, color); FillSoftwareRendererVRAM(x, y, width, height, color);
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{
// CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
return;
}
GPU_HW::FillVRAM(x, y, width, height, color); GPU_HW::FillVRAM(x, y, width, height, color);
// scale coordinates const Common::Rectangle<u32> bounds(GetVRAMTransferBounds(x, y, width, height));
x *= m_resolution_scale; glScissor(bounds.left * m_resolution_scale,
y *= m_resolution_scale; m_vram_texture.GetHeight() - (bounds.top * m_resolution_scale) - (height * m_resolution_scale),
width *= m_resolution_scale; width * m_resolution_scale, height * m_resolution_scale);
height *= m_resolution_scale;
glScissor(x, m_vram_texture.GetHeight() - y - height, width, height);
// fast path when not using interlaced rendering // fast path when not using interlaced rendering
if (!IsInterlacedRenderingEnabled()) const bool wrapped = IsVRAMFillOversized(x, y, width, height);
const bool interlaced = IsInterlacedRenderingEnabled();
if (!wrapped && !interlaced)
{ {
const auto [r, g, b, a] = const auto [r, g, b, a] =
RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color))); RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)));
@ -1048,7 +1044,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{ {
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
m_vram_interlaced_fill_program.Bind(); m_vram_fill_programs[BoolToUInt8(wrapped)][BoolToUInt8(interlaced)].Bind();
UploadUniformBuffer(&uniforms, sizeof(uniforms)); UploadUniformBuffer(&uniforms, sizeof(uniforms));
glDisable(GL_BLEND); glDisable(GL_BLEND);
SetDepthFunc(GL_ALWAYS); SetDepthFunc(GL_ALWAYS);

View File

@ -99,7 +99,7 @@ private:
std::array<std::array<std::array<std::array<GL::Program, 2>, 2>, 9>, 4> std::array<std::array<std::array<std::array<GL::Program, 2>, 2>, 9>, 4>
m_render_programs; // [render_mode][texture_mode][dithering][interlacing] m_render_programs; // [render_mode][texture_mode][dithering][interlacing]
std::array<std::array<GL::Program, 3>, 2> m_display_programs; // [depth_24][interlaced] std::array<std::array<GL::Program, 3>, 2> m_display_programs; // [depth_24][interlaced]
GL::Program m_vram_interlaced_fill_program; std::array<std::array<GL::Program, 2>, 2> m_vram_fill_programs;
GL::Program m_vram_read_program; GL::Program m_vram_read_program;
GL::Program m_vram_write_program; GL::Program m_vram_write_program;
GL::Program m_vram_copy_program; GL::Program m_vram_copy_program;

View File

@ -997,27 +997,6 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
return ss.str(); return ss.str();
} }
std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader()
{
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"float4 u_fill_color", "uint u_interlaced_displayed_field"}, true);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
ss << R"(
{
if ((fixYCoord(uint(v_pos.y)) & 1u) == u_interlaced_displayed_field)
discard;
o_col0 = u_fill_color;
o_depth = u_fill_color.a;
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit,
GPU_HW::InterlacedRenderMode interlace_mode, GPU_HW::InterlacedRenderMode interlace_mode,
bool smooth_chroma) bool smooth_chroma)
@ -1324,6 +1303,50 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
return ss.str(); return ss.str();
} }
std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced)
{
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth);
DefineMacro(ss, "WRAPPED", wrapped);
DefineMacro(ss, "INTERLACED", interlaced);
DeclareUniformBuffer(
ss, {"uint2 u_dst_coords", "uint2 u_end_coords", "float4 u_fill_color", "uint u_interlaced_displayed_field"}, true);
DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1, true, false, false, false);
ss << R"(
{
#if INTERLACED || WRAPPED
uint2 dst_coords = uint2(uint(v_pos.x), fixYCoord(uint(v_pos.y)));
#endif
#if INTERLACED
if ((dst_coords.y & 1u) == u_interlaced_displayed_field)
discard;
#endif
#if WRAPPED
// make sure it's not oversized and out of range
if ((dst_coords.x < u_dst_coords.x && dst_coords.x >= u_end_coords.x) ||
(dst_coords.y < u_dst_coords.y && dst_coords.y >= u_end_coords.y))
{
discard;
}
#endif
o_col0 = u_fill_color;
#if !PGXP_DEPTH
o_depth = u_fill_color.a;
#else
o_depth = 1.0f;
#endif
})";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader() std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
{ {
std::stringstream ss; std::stringstream ss;

View File

@ -13,12 +13,12 @@ public:
std::string GenerateBatchVertexShader(bool textured); std::string GenerateBatchVertexShader(bool textured);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode, std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode,
bool dithering, bool interlacing); bool dithering, bool interlacing);
std::string GenerateInterlacedFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode, std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode,
bool smooth_chroma); bool smooth_chroma);
std::string GenerateVRAMReadFragmentShader(); std::string GenerateVRAMReadFragmentShader();
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo); std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
std::string GenerateVRAMCopyFragmentShader(); std::string GenerateVRAMCopyFragmentShader();
std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced);
std::string GenerateVRAMUpdateDepthFragmentShader(); std::string GenerateVRAMUpdateDepthFragmentShader();
std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass);

View File

@ -840,7 +840,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
m_pgxp_depth_buffer, m_supports_dual_source_blend); m_pgxp_depth_buffer, m_supports_dual_source_blend);
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 +
2 + 2 + 1 + 1 + (2 * 3) + 1); (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1);
// vertex shaders - [textured] // vertex shaders - [textured]
// fragment shaders - [render_mode][texture_mode][dithering][interlacing] // fragment shaders - [render_mode][texture_mode][dithering][interlacing]
@ -1010,11 +1010,12 @@ bool GPU_HW_Vulkan::CompilePipelines()
gpbuilder.SetMultisamples(m_multisamples, false); gpbuilder.SetMultisamples(m_multisamples, false);
// VRAM fill // VRAM fill
for (u8 wrapped = 0; wrapped < 2; wrapped++)
{ {
for (u8 interlaced = 0; interlaced < 2; interlaced++) for (u8 interlaced = 0; interlaced < 2; interlaced++)
{ {
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader( VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(
(interlaced == 0) ? shadergen.GenerateFillFragmentShader() : shadergen.GenerateInterlacedFillFragmentShader()); shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)));
if (fs == VK_NULL_HANDLE) if (fs == VK_NULL_HANDLE)
return false; return false;
@ -1022,9 +1023,9 @@ bool GPU_HW_Vulkan::CompilePipelines()
gpbuilder.SetFragmentShader(fs); gpbuilder.SetFragmentShader(fs);
gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS); gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS);
m_vram_fill_pipelines[interlaced] = gpbuilder.Create(device, pipeline_cache, false); m_vram_fill_pipelines[wrapped][interlaced] = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(device, fs, nullptr); vkDestroyShaderModule(device, fs, nullptr);
if (m_vram_fill_pipelines[interlaced] == VK_NULL_HANDLE) if (m_vram_fill_pipelines[wrapped][interlaced] == VK_NULL_HANDLE)
return false; return false;
progress.Increment(); progress.Increment();
@ -1249,8 +1250,7 @@ void GPU_HW_Vulkan::DestroyPipelines()
{ {
m_batch_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); m_batch_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline);
for (VkPipeline& p : m_vram_fill_pipelines) m_vram_fill_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline);
Vulkan::Util::SafeDestroyPipeline(p);
for (VkPipeline& p : m_vram_write_pipelines) for (VkPipeline& p : m_vram_write_pipelines)
Vulkan::Util::SafeDestroyPipeline(p); Vulkan::Util::SafeDestroyPipeline(p);
@ -1482,23 +1482,8 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
if (IsUsingSoftwareRendererForReadbacks()) if (IsUsingSoftwareRendererForReadbacks())
FillSoftwareRendererVRAM(x, y, width, height, color); FillSoftwareRendererVRAM(x, y, width, height, color);
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{
// CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
return;
}
GPU_HW::FillVRAM(x, y, width, height, color); GPU_HW::FillVRAM(x, y, width, height, color);
x *= m_resolution_scale;
y *= m_resolution_scale;
width *= m_resolution_scale;
height *= m_resolution_scale;
BeginVRAMRenderPass(); BeginVRAMRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
@ -1506,8 +1491,12 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
&uniforms); &uniforms);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())]); m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))]
Vulkan::Util::SetViewportAndScissor(cmdbuf, x, y, width, height); [BoolToUInt8(IsInterlacedRenderingEnabled())]);
const Common::Rectangle<u32> bounds(GetVRAMTransferBounds(x, y, width, height));
Vulkan::Util::SetViewportAndScissor(cmdbuf, bounds.left * m_resolution_scale, bounds.top * m_resolution_scale,
bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale);
vkCmdDraw(cmdbuf, 3, 1, 0, 0); vkCmdDraw(cmdbuf, 3, 1, 0, 0);
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();

View File

@ -129,8 +129,8 @@ private:
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
DimensionalArray<VkPipeline, 2, 2, 5, 9, 4, 3> m_batch_pipelines{}; DimensionalArray<VkPipeline, 2, 2, 5, 9, 4, 3> m_batch_pipelines{};
// [interlaced] // [wrapped][interlaced]
std::array<VkPipeline, 2> m_vram_fill_pipelines{}; DimensionalArray<VkPipeline, 2, 2> m_vram_fill_pipelines{};
// [depth_test] // [depth_test]
std::array<VkPipeline, 2> m_vram_write_pipelines{}; std::array<VkPipeline, 2> m_vram_write_pipelines{};