GPU/HW: Implement oversized VRAM fills in hardware
Fixes downscaling in Bugs and Taz PAL.
This commit is contained in:
parent
7ea78ad2df
commit
70209db402
|
@ -976,6 +976,10 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32
|
||||||
color = VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color));
|
color = VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color));
|
||||||
|
|
||||||
VRAMFillUBOData uniforms;
|
VRAMFillUBOData uniforms;
|
||||||
|
uniforms.u_dst_x = (x % VRAM_WIDTH) * m_resolution_scale;
|
||||||
|
uniforms.u_dst_y = (y % VRAM_HEIGHT) * m_resolution_scale;
|
||||||
|
uniforms.u_end_x = ((x + width) % VRAM_WIDTH) * m_resolution_scale;
|
||||||
|
uniforms.u_end_y = ((y + height) % VRAM_HEIGHT) * m_resolution_scale;
|
||||||
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
|
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
|
||||||
RGBA8ToFloat(color);
|
RGBA8ToFloat(color);
|
||||||
|
|
||||||
|
|
|
@ -125,6 +125,10 @@ protected:
|
||||||
|
|
||||||
struct VRAMFillUBOData
|
struct VRAMFillUBOData
|
||||||
{
|
{
|
||||||
|
u32 u_dst_x;
|
||||||
|
u32 u_dst_y;
|
||||||
|
u32 u_end_x;
|
||||||
|
u32 u_end_y;
|
||||||
float u_fill_color[4];
|
float u_fill_color[4];
|
||||||
u32 u_interlaced_displayed_field;
|
u32 u_interlaced_displayed_field;
|
||||||
};
|
};
|
||||||
|
@ -268,13 +272,19 @@ protected:
|
||||||
|
|
||||||
/// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
|
/// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
|
||||||
/// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
|
/// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
|
||||||
bool NeedsTwoPassRendering() const
|
ALWAYS_INLINE bool NeedsTwoPassRendering() const
|
||||||
{
|
{
|
||||||
return (m_batch.texture_mode != GPUTextureMode::Disabled &&
|
return (m_batch.texture_mode != GPUTextureMode::Disabled &&
|
||||||
(m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ||
|
(m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ||
|
||||||
(!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled)));
|
(!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the specified VRAM fill is oversized.
|
||||||
|
ALWAYS_INLINE static bool IsVRAMFillOversized(u32 x, u32 y, u32 width, u32 height)
|
||||||
|
{
|
||||||
|
return ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT);
|
||||||
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE bool IsUsingSoftwareRendererForReadbacks() { return static_cast<bool>(m_sw_renderer); }
|
ALWAYS_INLINE bool IsUsingSoftwareRendererForReadbacks() { return static_cast<bool>(m_sw_renderer); }
|
||||||
|
|
||||||
void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
|
void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
|
||||||
|
|
|
@ -508,7 +508,8 @@ bool GPU_HW_D3D11::CompileShaders()
|
||||||
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
|
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
|
||||||
m_pgxp_depth_buffer, m_supports_dual_source_blend);
|
m_pgxp_depth_buffer, m_supports_dual_source_blend);
|
||||||
|
|
||||||
ShaderCompileProgressTracker progress("Compiling Shaders", 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3) + 1);
|
ShaderCompileProgressTracker progress("Compiling Shaders",
|
||||||
|
1 + 1 + 2 + (4 * 9 * 2 * 2) + 1 + (2 * 2) + 4 + (2 * 3) + 1);
|
||||||
|
|
||||||
// input layout
|
// input layout
|
||||||
{
|
{
|
||||||
|
@ -585,18 +586,19 @@ bool GPU_HW_D3D11::CompileShaders()
|
||||||
|
|
||||||
progress.Increment();
|
progress.Increment();
|
||||||
|
|
||||||
m_vram_fill_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateFillFragmentShader());
|
for (u8 wrapped = 0; wrapped < 2; wrapped++)
|
||||||
if (!m_vram_fill_pixel_shader)
|
{
|
||||||
return false;
|
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
||||||
|
{
|
||||||
|
const std::string ps =
|
||||||
|
shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced));
|
||||||
|
m_vram_fill_pixel_shaders[wrapped][interlaced] = shader_cache.GetPixelShader(m_device.Get(), ps);
|
||||||
|
if (!m_vram_fill_pixel_shaders[wrapped][interlaced])
|
||||||
|
return false;
|
||||||
|
|
||||||
progress.Increment();
|
progress.Increment();
|
||||||
|
}
|
||||||
m_vram_interlaced_fill_pixel_shader =
|
}
|
||||||
shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateInterlacedFillFragmentShader());
|
|
||||||
if (!m_vram_interlaced_fill_pixel_shader)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
progress.Increment();
|
|
||||||
|
|
||||||
m_vram_read_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMReadFragmentShader());
|
m_vram_read_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMReadFragmentShader());
|
||||||
if (!m_vram_read_pixel_shader)
|
if (!m_vram_read_pixel_shader)
|
||||||
|
@ -682,8 +684,7 @@ void GPU_HW_D3D11::DestroyShaders()
|
||||||
m_vram_copy_pixel_shader.Reset();
|
m_vram_copy_pixel_shader.Reset();
|
||||||
m_vram_write_pixel_shader.Reset();
|
m_vram_write_pixel_shader.Reset();
|
||||||
m_vram_read_pixel_shader.Reset();
|
m_vram_read_pixel_shader.Reset();
|
||||||
m_vram_interlaced_fill_pixel_shader.Reset();
|
m_vram_fill_pixel_shaders = {};
|
||||||
m_vram_fill_pixel_shader.Reset();
|
|
||||||
m_copy_pixel_shader.Reset();
|
m_copy_pixel_shader.Reset();
|
||||||
m_uv_quad_vertex_shader.Reset();
|
m_uv_quad_vertex_shader.Reset();
|
||||||
m_screen_quad_vertex_shader.Reset();
|
m_screen_quad_vertex_shader.Reset();
|
||||||
|
@ -976,26 +977,18 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
if (IsUsingSoftwareRendererForReadbacks())
|
if (IsUsingSoftwareRendererForReadbacks())
|
||||||
FillSoftwareRendererVRAM(x, y, width, height, color);
|
FillSoftwareRendererVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
|
|
||||||
{
|
|
||||||
// CPU round trip if oversized for now.
|
|
||||||
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
|
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
||||||
GPU::FillVRAM(x, y, width, height, color);
|
|
||||||
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_HW::FillVRAM(x, y, width, height, color);
|
GPU_HW::FillVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
|
|
||||||
|
|
||||||
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
|
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
|
||||||
|
|
||||||
SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale,
|
const Common::Rectangle<u32> bounds(GetVRAMTransferBounds(x, y, width, height));
|
||||||
height * m_resolution_scale);
|
SetViewportAndScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale,
|
||||||
DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() :
|
bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale);
|
||||||
m_vram_fill_pixel_shader.Get(),
|
|
||||||
|
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
|
||||||
|
DrawUtilityShader(m_vram_fill_pixel_shaders[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))]
|
||||||
|
[BoolToUInt8(IsInterlacedRenderingEnabled())]
|
||||||
|
.Get(),
|
||||||
&uniforms, sizeof(uniforms));
|
&uniforms, sizeof(uniforms));
|
||||||
|
|
||||||
RestoreGraphicsAPIState();
|
RestoreGraphicsAPIState();
|
||||||
|
|
|
@ -123,8 +123,7 @@ private:
|
||||||
ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader;
|
ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader;
|
||||||
ComPtr<ID3D11VertexShader> m_uv_quad_vertex_shader;
|
ComPtr<ID3D11VertexShader> m_uv_quad_vertex_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_fill_pixel_shader;
|
std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_vram_fill_pixel_shaders; // [wrapped][interlaced]
|
||||||
ComPtr<ID3D11PixelShader> m_vram_interlaced_fill_pixel_shader;
|
|
||||||
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
|
||||||
|
|
|
@ -420,8 +420,8 @@ bool GPU_HW_D3D12::CompilePipelines()
|
||||||
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
|
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
|
||||||
m_pgxp_depth_buffer, m_supports_dual_source_blend);
|
m_pgxp_depth_buffer, m_supports_dual_source_blend);
|
||||||
|
|
||||||
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 +
|
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 +
|
||||||
2 + 2 + 1 + 1 + (2 * 3) + 1);
|
(2 * 2) + 2 + 2 + 1 + 1 + (2 * 3) + 1);
|
||||||
|
|
||||||
// vertex shaders - [textured]
|
// vertex shaders - [textured]
|
||||||
// fragment shaders - [render_mode][texture_mode][dithering][interlacing]
|
// fragment shaders - [render_mode][texture_mode][dithering][interlacing]
|
||||||
|
@ -561,23 +561,24 @@ bool GPU_HW_D3D12::CompilePipelines()
|
||||||
gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetFormat());
|
gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetFormat());
|
||||||
|
|
||||||
// VRAM fill
|
// VRAM fill
|
||||||
|
for (u8 wrapped = 0; wrapped < 2; wrapped++)
|
||||||
{
|
{
|
||||||
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
||||||
{
|
{
|
||||||
ComPtr<ID3DBlob> fs = shader_cache.GetPixelShader(
|
ComPtr<ID3DBlob> fs = shader_cache.GetPixelShader(
|
||||||
(interlaced == 0) ? shadergen.GenerateFillFragmentShader() : shadergen.GenerateInterlacedFillFragmentShader());
|
shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)));
|
||||||
if (!fs)
|
if (!fs)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
gpbuilder.SetPixelShader(fs.Get());
|
gpbuilder.SetPixelShader(fs.Get());
|
||||||
gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS);
|
gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS);
|
||||||
|
|
||||||
m_vram_fill_pipelines[interlaced] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false);
|
m_vram_fill_pipelines[wrapped][interlaced] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false);
|
||||||
if (!m_vram_fill_pipelines[interlaced])
|
if (!m_vram_fill_pipelines[wrapped][interlaced])
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
D3D12::SetObjectNameFormatted(m_vram_fill_pipelines[interlaced].Get(), "VRAM Fill Pipeline Interlacing=%u",
|
D3D12::SetObjectNameFormatted(m_vram_fill_pipelines[wrapped][interlaced].Get(),
|
||||||
interlaced);
|
"VRAM Fill Pipeline Wrapped=%u,Interlacing=%u", wrapped, interlaced);
|
||||||
|
|
||||||
progress.Increment();
|
progress.Increment();
|
||||||
}
|
}
|
||||||
|
@ -994,31 +995,22 @@ void GPU_HW_D3D12::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
FillSoftwareRendererVRAM(x, y, width, height, color);
|
FillSoftwareRendererVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
// TODO: Use fast clear
|
// TODO: Use fast clear
|
||||||
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
|
|
||||||
{
|
|
||||||
// CPU round trip if oversized for now.
|
|
||||||
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
|
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
||||||
GPU::FillVRAM(x, y, width, height, color);
|
|
||||||
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_HW::FillVRAM(x, y, width, height, color);
|
GPU_HW::FillVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
x *= m_resolution_scale;
|
|
||||||
y *= m_resolution_scale;
|
|
||||||
width *= m_resolution_scale;
|
|
||||||
height *= m_resolution_scale;
|
|
||||||
|
|
||||||
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
|
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
|
||||||
|
|
||||||
ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList();
|
ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList();
|
||||||
cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get());
|
cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get());
|
||||||
cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0);
|
cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0);
|
||||||
cmdlist->SetGraphicsRootDescriptorTable(1, g_d3d12_context->GetNullSRVDescriptor());
|
cmdlist->SetGraphicsRootDescriptorTable(1, g_d3d12_context->GetNullSRVDescriptor());
|
||||||
cmdlist->SetPipelineState(m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())].Get());
|
cmdlist->SetPipelineState(m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))]
|
||||||
D3D12::SetViewportAndScissor(cmdlist, x, y, width, height);
|
[BoolToUInt8(IsInterlacedRenderingEnabled())]
|
||||||
|
.Get());
|
||||||
|
|
||||||
|
const Common::Rectangle<u32> bounds(GetVRAMTransferBounds(x, y, width, height));
|
||||||
|
D3D12::SetViewportAndScissor(cmdlist, bounds.left * m_resolution_scale, bounds.top * m_resolution_scale,
|
||||||
|
bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale);
|
||||||
|
|
||||||
cmdlist->DrawInstanced(3, 1, 0, 0);
|
cmdlist->DrawInstanced(3, 1, 0, 0);
|
||||||
|
|
||||||
RestoreGraphicsAPIState();
|
RestoreGraphicsAPIState();
|
||||||
|
|
|
@ -92,8 +92,8 @@ private:
|
||||||
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
|
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
|
||||||
DimensionalArray<ComPtr<ID3D12PipelineState>, 2, 2, 5, 9, 4, 2> m_batch_pipelines;
|
DimensionalArray<ComPtr<ID3D12PipelineState>, 2, 2, 5, 9, 4, 2> m_batch_pipelines;
|
||||||
|
|
||||||
// [interlaced]
|
// [wrapped][interlaced]
|
||||||
std::array<ComPtr<ID3D12PipelineState>, 2> m_vram_fill_pipelines;
|
DimensionalArray<ComPtr<ID3D12PipelineState>, 2, 2> m_vram_fill_pipelines;
|
||||||
|
|
||||||
// [depth_test]
|
// [depth_test]
|
||||||
std::array<ComPtr<ID3D12PipelineState>, 2> m_vram_write_pipelines;
|
std::array<ComPtr<ID3D12PipelineState>, 2> m_vram_write_pipelines;
|
||||||
|
|
|
@ -517,7 +517,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
|
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
|
||||||
m_pgxp_depth_buffer, m_supports_dual_source_blend);
|
m_pgxp_depth_buffer, m_supports_dual_source_blend);
|
||||||
|
|
||||||
ShaderCompileProgressTracker progress("Compiling Programs", (4 * 9 * 2 * 2) + (2 * 3) + 1 + 1 + 1 + 1 + 1 + 1);
|
ShaderCompileProgressTracker progress("Compiling Programs", (4 * 9 * 2 * 2) + (2 * 3) + (2 * 2) + 1 + 1 + 1 + 1 + 1);
|
||||||
|
|
||||||
for (u32 render_mode = 0; render_mode < 4; render_mode++)
|
for (u32 render_mode = 0; render_mode < 4; render_mode++)
|
||||||
{
|
{
|
||||||
|
@ -609,22 +609,29 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<GL::Program> prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
|
for (u8 wrapped = 0; wrapped < 2; wrapped++)
|
||||||
shadergen.GenerateInterlacedFillFragmentShader(),
|
{
|
||||||
[this, use_binding_layout](GL::Program& prog) {
|
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
||||||
if (!IsGLES() && !use_binding_layout)
|
{
|
||||||
prog.BindFragData(0, "o_col0");
|
std::optional<GL::Program> prog = shader_cache.GetProgram(
|
||||||
});
|
shadergen.GenerateScreenQuadVertexShader(), {},
|
||||||
if (!prog)
|
shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)),
|
||||||
return false;
|
[this, use_binding_layout](GL::Program& prog) {
|
||||||
|
if (!IsGLES() && !use_binding_layout)
|
||||||
|
prog.BindFragData(0, "o_col0");
|
||||||
|
});
|
||||||
|
if (!prog)
|
||||||
|
return false;
|
||||||
|
|
||||||
if (!use_binding_layout)
|
if (!use_binding_layout)
|
||||||
prog->BindUniformBlock("UBOBlock", 1);
|
prog->BindUniformBlock("UBOBlock", 1);
|
||||||
|
|
||||||
m_vram_interlaced_fill_program = std::move(*prog);
|
m_vram_fill_programs[wrapped][interlaced] = std::move(*prog);
|
||||||
progress.Increment();
|
progress.Increment();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
prog =
|
std::optional<GL::Program> prog =
|
||||||
shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, shadergen.GenerateVRAMReadFragmentShader(),
|
shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, shadergen.GenerateVRAMReadFragmentShader(),
|
||||||
[this, use_binding_layout](GL::Program& prog) {
|
[this, use_binding_layout](GL::Program& prog) {
|
||||||
if (!IsGLES() && !use_binding_layout)
|
if (!IsGLES() && !use_binding_layout)
|
||||||
|
@ -1014,28 +1021,17 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
if (IsUsingSoftwareRendererForReadbacks())
|
if (IsUsingSoftwareRendererForReadbacks())
|
||||||
FillSoftwareRendererVRAM(x, y, width, height, color);
|
FillSoftwareRendererVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
|
|
||||||
{
|
|
||||||
// CPU round trip if oversized for now.
|
|
||||||
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
|
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
||||||
GPU::FillVRAM(x, y, width, height, color);
|
|
||||||
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_HW::FillVRAM(x, y, width, height, color);
|
GPU_HW::FillVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
// scale coordinates
|
const Common::Rectangle<u32> bounds(GetVRAMTransferBounds(x, y, width, height));
|
||||||
x *= m_resolution_scale;
|
glScissor(bounds.left * m_resolution_scale,
|
||||||
y *= m_resolution_scale;
|
m_vram_texture.GetHeight() - (bounds.top * m_resolution_scale) - (height * m_resolution_scale),
|
||||||
width *= m_resolution_scale;
|
width * m_resolution_scale, height * m_resolution_scale);
|
||||||
height *= m_resolution_scale;
|
|
||||||
|
|
||||||
glScissor(x, m_vram_texture.GetHeight() - y - height, width, height);
|
|
||||||
|
|
||||||
// fast path when not using interlaced rendering
|
// fast path when not using interlaced rendering
|
||||||
if (!IsInterlacedRenderingEnabled())
|
const bool wrapped = IsVRAMFillOversized(x, y, width, height);
|
||||||
|
const bool interlaced = IsInterlacedRenderingEnabled();
|
||||||
|
if (!wrapped && !interlaced)
|
||||||
{
|
{
|
||||||
const auto [r, g, b, a] =
|
const auto [r, g, b, a] =
|
||||||
RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)));
|
RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)));
|
||||||
|
@ -1048,7 +1044,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
{
|
{
|
||||||
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
|
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
|
||||||
|
|
||||||
m_vram_interlaced_fill_program.Bind();
|
m_vram_fill_programs[BoolToUInt8(wrapped)][BoolToUInt8(interlaced)].Bind();
|
||||||
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
||||||
glDisable(GL_BLEND);
|
glDisable(GL_BLEND);
|
||||||
SetDepthFunc(GL_ALWAYS);
|
SetDepthFunc(GL_ALWAYS);
|
||||||
|
|
|
@ -99,7 +99,7 @@ private:
|
||||||
std::array<std::array<std::array<std::array<GL::Program, 2>, 2>, 9>, 4>
|
std::array<std::array<std::array<std::array<GL::Program, 2>, 2>, 9>, 4>
|
||||||
m_render_programs; // [render_mode][texture_mode][dithering][interlacing]
|
m_render_programs; // [render_mode][texture_mode][dithering][interlacing]
|
||||||
std::array<std::array<GL::Program, 3>, 2> m_display_programs; // [depth_24][interlaced]
|
std::array<std::array<GL::Program, 3>, 2> m_display_programs; // [depth_24][interlaced]
|
||||||
GL::Program m_vram_interlaced_fill_program;
|
std::array<std::array<GL::Program, 2>, 2> m_vram_fill_programs;
|
||||||
GL::Program m_vram_read_program;
|
GL::Program m_vram_read_program;
|
||||||
GL::Program m_vram_write_program;
|
GL::Program m_vram_write_program;
|
||||||
GL::Program m_vram_copy_program;
|
GL::Program m_vram_copy_program;
|
||||||
|
|
|
@ -997,27 +997,6 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader()
|
|
||||||
{
|
|
||||||
std::stringstream ss;
|
|
||||||
WriteHeader(ss);
|
|
||||||
WriteCommonFunctions(ss);
|
|
||||||
DeclareUniformBuffer(ss, {"float4 u_fill_color", "uint u_interlaced_displayed_field"}, true);
|
|
||||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
|
||||||
|
|
||||||
ss << R"(
|
|
||||||
{
|
|
||||||
if ((fixYCoord(uint(v_pos.y)) & 1u) == u_interlaced_displayed_field)
|
|
||||||
discard;
|
|
||||||
|
|
||||||
o_col0 = u_fill_color;
|
|
||||||
o_depth = u_fill_color.a;
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit,
|
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit,
|
||||||
GPU_HW::InterlacedRenderMode interlace_mode,
|
GPU_HW::InterlacedRenderMode interlace_mode,
|
||||||
bool smooth_chroma)
|
bool smooth_chroma)
|
||||||
|
@ -1324,6 +1303,50 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced)
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
WriteHeader(ss);
|
||||||
|
WriteCommonFunctions(ss);
|
||||||
|
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth);
|
||||||
|
DefineMacro(ss, "WRAPPED", wrapped);
|
||||||
|
DefineMacro(ss, "INTERLACED", interlaced);
|
||||||
|
|
||||||
|
DeclareUniformBuffer(
|
||||||
|
ss, {"uint2 u_dst_coords", "uint2 u_end_coords", "float4 u_fill_color", "uint u_interlaced_displayed_field"}, true);
|
||||||
|
|
||||||
|
DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1, true, false, false, false);
|
||||||
|
ss << R"(
|
||||||
|
{
|
||||||
|
#if INTERLACED || WRAPPED
|
||||||
|
uint2 dst_coords = uint2(uint(v_pos.x), fixYCoord(uint(v_pos.y)));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if INTERLACED
|
||||||
|
if ((dst_coords.y & 1u) == u_interlaced_displayed_field)
|
||||||
|
discard;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if WRAPPED
|
||||||
|
// make sure it's not oversized and out of range
|
||||||
|
if ((dst_coords.x < u_dst_coords.x && dst_coords.x >= u_end_coords.x) ||
|
||||||
|
(dst_coords.y < u_dst_coords.y && dst_coords.y >= u_end_coords.y))
|
||||||
|
{
|
||||||
|
discard;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
o_col0 = u_fill_color;
|
||||||
|
#if !PGXP_DEPTH
|
||||||
|
o_depth = u_fill_color.a;
|
||||||
|
#else
|
||||||
|
o_depth = 1.0f;
|
||||||
|
#endif
|
||||||
|
})";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
|
std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
|
|
@ -13,12 +13,12 @@ public:
|
||||||
std::string GenerateBatchVertexShader(bool textured);
|
std::string GenerateBatchVertexShader(bool textured);
|
||||||
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode,
|
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode,
|
||||||
bool dithering, bool interlacing);
|
bool dithering, bool interlacing);
|
||||||
std::string GenerateInterlacedFillFragmentShader();
|
|
||||||
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode,
|
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode,
|
||||||
bool smooth_chroma);
|
bool smooth_chroma);
|
||||||
std::string GenerateVRAMReadFragmentShader();
|
std::string GenerateVRAMReadFragmentShader();
|
||||||
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
|
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
|
||||||
std::string GenerateVRAMCopyFragmentShader();
|
std::string GenerateVRAMCopyFragmentShader();
|
||||||
|
std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced);
|
||||||
std::string GenerateVRAMUpdateDepthFragmentShader();
|
std::string GenerateVRAMUpdateDepthFragmentShader();
|
||||||
|
|
||||||
std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass);
|
std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass);
|
||||||
|
|
|
@ -840,7 +840,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
||||||
m_pgxp_depth_buffer, m_supports_dual_source_blend);
|
m_pgxp_depth_buffer, m_supports_dual_source_blend);
|
||||||
|
|
||||||
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 +
|
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 +
|
||||||
2 + 2 + 1 + 1 + (2 * 3) + 1);
|
(2 * 2) + 2 + 1 + 1 + (2 * 3) + 1);
|
||||||
|
|
||||||
// vertex shaders - [textured]
|
// vertex shaders - [textured]
|
||||||
// fragment shaders - [render_mode][texture_mode][dithering][interlacing]
|
// fragment shaders - [render_mode][texture_mode][dithering][interlacing]
|
||||||
|
@ -1010,11 +1010,12 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
||||||
gpbuilder.SetMultisamples(m_multisamples, false);
|
gpbuilder.SetMultisamples(m_multisamples, false);
|
||||||
|
|
||||||
// VRAM fill
|
// VRAM fill
|
||||||
|
for (u8 wrapped = 0; wrapped < 2; wrapped++)
|
||||||
{
|
{
|
||||||
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
||||||
{
|
{
|
||||||
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(
|
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(
|
||||||
(interlaced == 0) ? shadergen.GenerateFillFragmentShader() : shadergen.GenerateInterlacedFillFragmentShader());
|
shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)));
|
||||||
if (fs == VK_NULL_HANDLE)
|
if (fs == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -1022,9 +1023,9 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
||||||
gpbuilder.SetFragmentShader(fs);
|
gpbuilder.SetFragmentShader(fs);
|
||||||
gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS);
|
gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS);
|
||||||
|
|
||||||
m_vram_fill_pipelines[interlaced] = gpbuilder.Create(device, pipeline_cache, false);
|
m_vram_fill_pipelines[wrapped][interlaced] = gpbuilder.Create(device, pipeline_cache, false);
|
||||||
vkDestroyShaderModule(device, fs, nullptr);
|
vkDestroyShaderModule(device, fs, nullptr);
|
||||||
if (m_vram_fill_pipelines[interlaced] == VK_NULL_HANDLE)
|
if (m_vram_fill_pipelines[wrapped][interlaced] == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
progress.Increment();
|
progress.Increment();
|
||||||
|
@ -1249,8 +1250,7 @@ void GPU_HW_Vulkan::DestroyPipelines()
|
||||||
{
|
{
|
||||||
m_batch_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline);
|
m_batch_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline);
|
||||||
|
|
||||||
for (VkPipeline& p : m_vram_fill_pipelines)
|
m_vram_fill_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline);
|
||||||
Vulkan::Util::SafeDestroyPipeline(p);
|
|
||||||
|
|
||||||
for (VkPipeline& p : m_vram_write_pipelines)
|
for (VkPipeline& p : m_vram_write_pipelines)
|
||||||
Vulkan::Util::SafeDestroyPipeline(p);
|
Vulkan::Util::SafeDestroyPipeline(p);
|
||||||
|
@ -1482,23 +1482,8 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
if (IsUsingSoftwareRendererForReadbacks())
|
if (IsUsingSoftwareRendererForReadbacks())
|
||||||
FillSoftwareRendererVRAM(x, y, width, height, color);
|
FillSoftwareRendererVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
|
|
||||||
{
|
|
||||||
// CPU round trip if oversized for now.
|
|
||||||
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
|
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
||||||
GPU::FillVRAM(x, y, width, height, color);
|
|
||||||
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_HW::FillVRAM(x, y, width, height, color);
|
GPU_HW::FillVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
x *= m_resolution_scale;
|
|
||||||
y *= m_resolution_scale;
|
|
||||||
width *= m_resolution_scale;
|
|
||||||
height *= m_resolution_scale;
|
|
||||||
|
|
||||||
BeginVRAMRenderPass();
|
BeginVRAMRenderPass();
|
||||||
|
|
||||||
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
||||||
|
@ -1506,8 +1491,12 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
|
vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
|
||||||
&uniforms);
|
&uniforms);
|
||||||
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||||
m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())]);
|
m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))]
|
||||||
Vulkan::Util::SetViewportAndScissor(cmdbuf, x, y, width, height);
|
[BoolToUInt8(IsInterlacedRenderingEnabled())]);
|
||||||
|
|
||||||
|
const Common::Rectangle<u32> bounds(GetVRAMTransferBounds(x, y, width, height));
|
||||||
|
Vulkan::Util::SetViewportAndScissor(cmdbuf, bounds.left * m_resolution_scale, bounds.top * m_resolution_scale,
|
||||||
|
bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale);
|
||||||
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
||||||
|
|
||||||
RestoreGraphicsAPIState();
|
RestoreGraphicsAPIState();
|
||||||
|
|
|
@ -129,8 +129,8 @@ private:
|
||||||
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
|
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
|
||||||
DimensionalArray<VkPipeline, 2, 2, 5, 9, 4, 3> m_batch_pipelines{};
|
DimensionalArray<VkPipeline, 2, 2, 5, 9, 4, 3> m_batch_pipelines{};
|
||||||
|
|
||||||
// [interlaced]
|
// [wrapped][interlaced]
|
||||||
std::array<VkPipeline, 2> m_vram_fill_pipelines{};
|
DimensionalArray<VkPipeline, 2, 2> m_vram_fill_pipelines{};
|
||||||
|
|
||||||
// [depth_test]
|
// [depth_test]
|
||||||
std::array<VkPipeline, 2> m_vram_write_pipelines{};
|
std::array<VkPipeline, 2> m_vram_write_pipelines{};
|
||||||
|
|
Loading…
Reference in New Issue