GPU/HW: Support using ROV for accurate blending

This commit is contained in:
Stenzek 2024-07-22 01:12:33 +10:00
parent 104341b9bf
commit 25292d94fd
No known key found for this signature in database
15 changed files with 639 additions and 286 deletions

View File

@ -34,7 +34,7 @@ namespace GameDatabase {
enum : u32
{
GAME_DATABASE_CACHE_SIGNATURE = 0x45434C48,
GAME_DATABASE_CACHE_VERSION = 11,
GAME_DATABASE_CACHE_VERSION = 12,
};
static Entry* GetMutableEntry(std::string_view serial);
@ -64,6 +64,7 @@ static constexpr const std::array<const char*, static_cast<u32>(GameDatabase::Tr
"ForceSoftwareRenderer",
"ForceSoftwareRendererForReadbacks",
"ForceRoundTextureCoordinates",
"ForceAccurateBlending",
"ForceInterlacing",
"DisableTrueColor",
"DisableUpscaling",
@ -492,6 +493,14 @@ void GameDatabase::Entry::ApplySettings(Settings& settings, bool display_osd_mes
settings.gpu_force_round_texcoords = true;
}
if (HasTrait(Trait::ForceAccurateBlending))
{
if (display_osd_messages && !settings.IsUsingSoftwareRenderer() && !settings.gpu_accurate_blending)
APPEND_MESSAGE(ICON_FA_MAGIC, TRANSLATE_SV("GameDatabase", "Accurate blending enabled."));
settings.gpu_accurate_blending = true;
}
if (HasTrait(Trait::ForceInterlacing))
{
if (display_osd_messages && settings.gpu_disable_interlacing)

View File

@ -32,6 +32,7 @@ enum class Trait : u32
ForceSoftwareRenderer,
ForceSoftwareRendererForReadbacks,
ForceRoundUpscaledTextureCoordinates,
ForceAccurateBlending,
ForceInterlacing,
DisableTrueColor,
DisableUpscaling,

View File

@ -37,10 +37,29 @@ Log_SetChannel(GPU_HW);
static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8;
static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16;
static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32F;
static constexpr GPUTexture::Format VRAM_DS_EXTRACT_FORMAT = GPUTexture::Format::R32F;
static constexpr GPUTexture::Format VRAM_DS_COLOR_FORMAT = GPUTexture::Format::R32F;
#ifdef _DEBUG
static u32 s_draw_number = 0;
static constexpr const std::array s_transparency_modes = {
"HalfBackgroundPlusHalfForeground",
"BackgroundPlusForeground",
"BackgroundMinusForeground",
"BackgroundPlusQuarterForeground",
"Disabled",
};
static constexpr const std::array s_batch_texture_modes = {
"Palette4Bit", "Palette8Bit", "Direct16Bit", "Disabled",
"SpritePalette4Bit", "SpritePalette8Bit", "SpriteDirect16Bit",
};
static constexpr const std::array s_batch_render_modes = {
"TransparencyDisabled", "TransparentAndOpaque", "OnlyOpaque", "OnlyTransparent", "ShaderBlend",
};
#endif
/// Returns the distance between two rectangles.
@ -370,9 +389,9 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
const u8 resolution_scale = Truncate8(CalculateResolutionScale());
const u8 multisamples = Truncate8(std::min<u32>(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples()));
const bool clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering);
const bool framebuffer_changed =
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
(static_cast<bool>(m_vram_depth_texture) != (g_settings.UsingPGXPDepthBuffer() || !m_supports_framebuffer_fetch)));
const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() ||
m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer());
const bool shaders_changed =
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
m_true_color != g_settings.gpu_true_color || g_settings.gpu_debanding != old_settings.gpu_debanding ||
@ -380,6 +399,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
(resolution_scale > 1 && g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) ||
(resolution_scale > 1 && g_settings.gpu_texture_filter == GPUTextureFilter::Nearest &&
g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) ||
g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() ||
m_texture_filtering != g_settings.gpu_texture_filter ||
m_sprite_texture_filtering != g_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs ||
(resolution_scale > 1 && (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode ||
@ -442,24 +462,16 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
m_allow_sprite_mode = ShouldAllowSpriteMode(resolution_scale, m_texture_filtering, m_sprite_texture_filtering);
m_batch.sprite_mode = (m_allow_sprite_mode && m_batch.sprite_mode);
CheckSettings();
if (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer())
const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer());
if (depth_buffer_changed)
{
m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer();
m_batch.use_depth_buffer = false;
m_depth_was_copied = false;
// might be null when resizing
if (m_vram_depth_texture)
{
if (m_pgxp_depth_buffer)
ClearDepthBuffer();
else
UpdateDepthBufferFromMaskBit();
}
}
CheckSettings();
UpdateSoftwareRenderer(true);
PrintSettingsToLog();
@ -489,9 +501,17 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
UpdateDownsamplingLevels();
RestoreDeviceContext();
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false);
UpdateDepthBufferFromMaskBit();
if (m_write_mask_as_depth)
UpdateDepthBufferFromMaskBit();
UpdateDisplay();
}
else if (m_vram_depth_texture && depth_buffer_changed)
{
if (m_pgxp_depth_buffer)
ClearDepthBuffer();
else if (m_write_mask_as_depth)
UpdateDepthBufferFromMaskBit();
}
if (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode ||
(g_settings.gpu_downsample_mode == GPUDownsampleMode::Box &&
@ -536,6 +556,37 @@ void GPU_HW::CheckSettings()
m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering);
}
if (g_settings.IsUsingAccurateBlending() && !m_supports_framebuffer_fetch && !features.feedback_loops &&
!features.raster_order_views)
{
// m_allow_shader_blend/m_prefer_shader_blend will be cleared in pipeline compile.
Host::AddIconOSDMessage(
"AccurateBlendingUnsupported", ICON_FA_PAINT_BRUSH,
TRANSLATE_STR("GPU_HW", "Accurate blending is not supported by your current GPU.\nIt requires framebuffer fetch, "
"feedback loops, or rasterizer order views."),
Host::OSD_WARNING_DURATION);
}
else if (IsUsingMultisampling() && !features.framebuffer_fetch &&
((g_settings.IsUsingAccurateBlending() && features.raster_order_views) ||
(m_pgxp_depth_buffer && features.raster_order_views && !features.feedback_loops)))
{
Host::AddIconOSDMessage(
"AccurateBlendingUnsupported", ICON_FA_PAINT_BRUSH,
TRANSLATE_STR("GPU_HW", "Multisample anti-aliasing is not supported when using ROV blending."),
Host::OSD_WARNING_DURATION);
m_multisamples = 1;
}
if (m_pgxp_depth_buffer && !features.feedback_loops && !features.framebuffer_fetch && !features.raster_order_views)
{
Host::AddIconOSDMessage(
"AccurateBlendingUnsupported", ICON_FA_PAINT_BRUSH,
TRANSLATE_STR("GPU_HW", "PGXP depth buffer is not supported by your current GPU or renderer.\nIt requires "
"framebuffer fetch, feedback loops, or rasterizer order views."),
Host::OSD_WARNING_DURATION);
m_pgxp_depth_buffer = false;
}
if (!features.noperspective_interpolation && !ShouldDisableColorPerspective())
WARNING_LOG("Disable color perspective not supported, but should be used.");
@ -734,16 +785,11 @@ void GPU_HW::PrintSettingsToLog()
INFO_LOG("Separate sprite shaders: {}", m_allow_sprite_mode ? "YES" : "NO");
}
bool GPU_HW::NeedsDepthBuffer() const
{
// PGXP depth, or no fbfetch, which means we need depth for the mask bit.
return (m_pgxp_depth_buffer || !m_supports_framebuffer_fetch);
}
GPUTexture::Format GPU_HW::GetDepthBufferFormat() const
{
// Use 32-bit depth for PGXP depth buffer, otherwise 16-bit for mask bit.
return m_pgxp_depth_buffer ? VRAM_DS_DEPTH_FORMAT : VRAM_DS_FORMAT;
return m_pgxp_depth_buffer ? (m_use_rov_for_shader_blend ? VRAM_DS_COLOR_FORMAT : VRAM_DS_DEPTH_FORMAT) :
VRAM_DS_FORMAT;
}
bool GPU_HW::CreateBuffers()
@ -754,22 +800,25 @@ bool GPU_HW::CreateBuffers()
const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
const u8 samples = static_cast<u8>(m_multisamples);
const bool needs_depth_buffer = NeedsDepthBuffer();
DEV_LOG("Depth buffer is {}needed in {}", needs_depth_buffer ? "" : "NOT ",
GPUTexture::GetFormatName(GetDepthBufferFormat()));
const bool needs_depth_buffer = m_write_mask_as_depth || m_pgxp_depth_buffer;
// Needed for Metal resolve.
const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ?
GPUTexture::Type::RWTexture :
GPUTexture::Type::Texture;
const GPUTexture::Type vram_texture_type =
m_use_rov_for_shader_blend ? GPUTexture::Type::RWTexture : GPUTexture::Type::RenderTarget;
const GPUTexture::Type depth_texture_type =
m_use_rov_for_shader_blend ? GPUTexture::Type::RWTexture : GPUTexture::Type::DepthStencil;
if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, vram_texture_type,
VRAM_RT_FORMAT)) ||
(needs_depth_buffer &&
(!(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples,
GPUTexture::Type::DepthStencil, GetDepthBufferFormat())) ||
!(m_vram_depth_copy_texture = g_gpu_device->FetchTexture(
texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, VRAM_DS_EXTRACT_FORMAT)))) ||
!(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples,
depth_texture_type, GetDepthBufferFormat()))) ||
(m_pgxp_depth_buffer && !(m_vram_depth_copy_texture =
g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples,
GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT))) ||
!(m_vram_read_texture =
g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) ||
!(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1,
@ -826,15 +875,43 @@ void GPU_HW::ClearFramebuffer()
{
g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0);
if (m_vram_depth_texture)
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f);
{
if (m_use_rov_for_shader_blend)
g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF);
else
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f);
}
ClearVRAMDirtyRectangle();
m_last_depth_z = 1.0f;
}
void GPU_HW::SetVRAMRenderTarget()
{
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get(),
m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags);
if (m_use_rov_for_shader_blend)
{
GPUTexture* rts[2] = {m_vram_texture.get(), m_vram_depth_texture.get()};
const u32 num_rts = m_pgxp_depth_buffer ? 2 : 1;
g_gpu_device->SetRenderTargets(
rts, num_rts, nullptr, m_rov_active ? GPUPipeline::BindRenderTargetsAsImages : GPUPipeline::NoRenderPassFlags);
}
else
{
g_gpu_device->SetRenderTarget(
m_vram_texture.get(), m_vram_depth_texture.get(),
((m_allow_shader_blend && !m_supports_framebuffer_fetch && !m_use_rov_for_shader_blend) ?
GPUPipeline::ColorFeedbackLoop :
GPUPipeline::NoRenderPassFlags));
}
}
void GPU_HW::DeactivateROV()
{
if (!m_rov_active)
return;
GL_INS("Deactivating ROV.");
m_rov_active = false;
SetVRAMRenderTarget();
}
void GPU_HW::DestroyBuffers()
@ -863,42 +940,73 @@ bool GPU_HW::CompilePipelines(Error* error)
const bool per_sample_shading = g_settings.gpu_per_sample_shading && features.per_sample_shading;
const bool force_round_texcoords = (m_resolution_scale > 1 && m_texture_filtering == GPUTextureFilter::Nearest &&
g_settings.gpu_force_round_texcoords);
const bool needs_depth_buffer = NeedsDepthBuffer();
const bool write_mask_as_depth = (!m_pgxp_depth_buffer && needs_depth_buffer);
// Determine when to use shader blending.
// FBFetch is free, we need it for filtering without DSB, or when accurate blending is forced.
// But, don't bother with accurate blending if true colour is on. The result will be the same.
// Prefer ROV over barriers/feedback loops without FBFetch, it'll be faster.
// Abuse the depth buffer for the mask bit when it's free (FBFetch), or PGXP depth buffering is enabled.
m_allow_shader_blend = (features.feedback_loops || features.raster_order_views || features.framebuffer_fetch) &&
(m_pgxp_depth_buffer || g_settings.gpu_accurate_blending ||
(!m_supports_dual_source_blend && (IsBlendedTextureFiltering(m_texture_filtering) ||
IsBlendedTextureFiltering(m_sprite_texture_filtering))));
m_prefer_shader_blend = (m_allow_shader_blend && g_settings.gpu_accurate_blending && !g_settings.gpu_true_color);
m_use_rov_for_shader_blend = (m_allow_shader_blend && !features.framebuffer_fetch && features.raster_order_views &&
(m_prefer_shader_blend || !features.feedback_loops));
m_write_mask_as_depth = (!m_pgxp_depth_buffer && !features.framebuffer_fetch && !m_prefer_shader_blend);
// ROV doesn't support MSAA in DirectX.
Assert(!m_use_rov_for_shader_blend || !IsUsingMultisampling());
const bool needs_depth_buffer = (m_pgxp_depth_buffer || m_write_mask_as_depth);
const bool needs_rov_depth = (m_pgxp_depth_buffer && m_use_rov_for_shader_blend);
const bool needs_real_depth_buffer = (needs_depth_buffer && !needs_rov_depth);
const bool needs_feedback_loop = (m_allow_shader_blend && features.feedback_loops && !m_use_rov_for_shader_blend);
const GPUTexture::Format depth_buffer_format =
needs_depth_buffer ? GetDepthBufferFormat() : GPUTexture::Format::Unknown;
m_allow_shader_blend = (features.feedback_loops && (m_pgxp_depth_buffer || !needs_depth_buffer));
// Logging in case something goes wrong.
INFO_LOG("Shader blending allowed: {}", m_allow_shader_blend ? "YES" : "NO");
INFO_LOG("Shader blending preferred: {}", m_prefer_shader_blend ? "YES" : "NO");
INFO_LOG("Use ROV for shader blending: {}", m_use_rov_for_shader_blend ? "YES" : "NO");
INFO_LOG("Write mask as depth: {}", m_write_mask_as_depth ? "YES" : "NO");
INFO_LOG("Depth buffer is {}needed in {}.", needs_depth_buffer ? "" : "NOT ",
GPUTexture::GetFormatName(GetDepthBufferFormat()));
INFO_LOG("Using ROV depth: {}", needs_rov_depth ? "YES" : "NO");
INFO_LOG("Using real depth buffer: {}", needs_real_depth_buffer ? "YES" : "NO");
INFO_LOG("Using feedback loops: {}", needs_feedback_loop ? "YES" : "NO");
// Start generating shaders.
GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, per_sample_shading,
m_true_color, (m_resolution_scale > 1 && g_settings.gpu_scaled_dithering),
write_mask_as_depth, ShouldDisableColorPerspective(), m_supports_dual_source_blend,
m_write_mask_as_depth, ShouldDisableColorPerspective(), m_supports_dual_source_blend,
m_supports_framebuffer_fetch, g_settings.gpu_true_color && g_settings.gpu_debanding);
const u32 active_texture_modes =
m_allow_sprite_mode ? NUM_TEXTURE_MODES :
(NUM_TEXTURE_MODES - (NUM_TEXTURE_MODES - static_cast<u32>(BatchTextureMode::SpriteStart)));
const u32 total_pipelines =
(m_allow_sprite_mode ? 5 : 3) + // vertex shaders
(active_texture_modes * 5 * 9 * 2 * 2 * 2) + // fragment shaders
((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines
((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe
1 + // fullscreen quad VS
(2 * 2) + // vram fill
(1 + BoolToUInt32(write_mask_as_depth)) + // vram copy
(1 + BoolToUInt32(write_mask_as_depth)) + // vram write
1 + // vram write replacement
(needs_depth_buffer ? 1 : 0) + // mask -> depth
1 + // vram read
2 + // extract/display
((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample
(m_allow_sprite_mode ? 5 : 3) + // vertex shaders
(active_texture_modes * 5 * 9 * 2 * 2 * 2 * (1 + BoolToUInt32(needs_rov_depth))) + // fragment shaders
((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines
((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe
1 + // fullscreen quad VS
(2 * 2) + // vram fill
(1 + BoolToUInt32(m_write_mask_as_depth)) + // vram copy
(1 + BoolToUInt32(m_write_mask_as_depth)) + // vram write
1 + // vram write replacement
(m_write_mask_as_depth ? 1 : 0) + // mask -> depth
1 + // vram read
2 + // extract/display
((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample
ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines);
// vertex shaders - [textured/palette/sprite]
// fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing]
// fragment shaders - [depth_test][render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing]
static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); };
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2> batch_vertex_shaders{};
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5> batch_fragment_shaders{};
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5, 2> batch_fragment_shaders{};
ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
batch_vertex_shaders.enumerate(destroy_shader);
batch_fragment_shaders.enumerate(destroy_shader);
@ -924,56 +1032,71 @@ bool GPU_HW::CompilePipelines(Error* error)
}
}
for (u8 render_mode = 0; render_mode < 5; render_mode++)
for (u8 depth_test = 0; depth_test < 2; depth_test++)
{
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
if (depth_test && !needs_rov_depth)
{
if (
// Can't generate shader blending.
((render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && !features.feedback_loops) ||
(render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend) &&
transparency_mode != static_cast<u8>(GPUTransparencyMode::Disabled))) ||
// Don't need multipass shaders.
(m_supports_framebuffer_fetch && (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))))
{
progress.Increment(active_texture_modes * 2 * 2 * 2);
continue;
}
// Don't need to do depth testing in the shader.
continue;
}
for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++)
for (u8 render_mode = 0; render_mode < 5; render_mode++)
{
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{
for (u8 check_mask = 0; check_mask < 2; check_mask++)
if (
// Can't generate shader blending.
((render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && !m_allow_shader_blend) ||
(render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend) &&
transparency_mode != static_cast<u8>(GPUTransparencyMode::Disabled))) ||
// Don't need multipass shaders if we're preferring shader blend or have (free) FBFetch.
((m_supports_framebuffer_fetch || m_prefer_shader_blend) &&
(render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))) ||
// If using ROV depth, we only draw with shader blending.
(needs_rov_depth && render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend)))
{
if (check_mask && render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend))
{
// mask bit testing is only valid with shader blending.
progress.Increment(2 * 2);
continue;
}
progress.Increment(active_texture_modes * 2 * 2 * 2);
continue;
}
for (u8 dithering = 0; dithering < 2; dithering++)
for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++)
{
for (u8 check_mask = 0; check_mask < 2; check_mask++)
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
if (check_mask && render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend))
{
const bool sprite = (static_cast<BatchTextureMode>(texture_mode) >= BatchTextureMode::SpriteStart);
const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering);
const BatchTextureMode shader_texmode = static_cast<BatchTextureMode>(
texture_mode - (sprite ? static_cast<u8>(BatchTextureMode::SpriteStart) : 0));
const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTransparencyMode>(transparency_mode),
shader_texmode, sprite ? m_sprite_texture_filtering : m_texture_filtering, uv_limits,
!sprite && force_round_texcoords, ConvertToBoolUnchecked(dithering),
ConvertToBoolUnchecked(interlacing), ConvertToBoolUnchecked(check_mask));
// mask bit testing is only valid with shader blending.
progress.Increment(2 * 2);
continue;
}
if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][check_mask][dithering]
[interlacing] = g_gpu_device->CreateShader(
GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error)))
for (u8 dithering = 0; dithering < 2; dithering++)
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
return false;
}
const bool sprite = (static_cast<BatchTextureMode>(texture_mode) >= BatchTextureMode::SpriteStart);
const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering);
const BatchTextureMode shader_texmode = static_cast<BatchTextureMode>(
texture_mode - (sprite ? static_cast<u8>(BatchTextureMode::SpriteStart) : 0));
const bool use_rov =
(render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && m_use_rov_for_shader_blend);
const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTransparencyMode>(transparency_mode),
shader_texmode, sprite ? m_sprite_texture_filtering : m_texture_filtering, uv_limits,
!sprite && force_round_texcoords, ConvertToBoolUnchecked(dithering),
ConvertToBoolUnchecked(interlacing), ConvertToBoolUnchecked(check_mask), use_rov, needs_rov_depth,
(depth_test != 0));
progress.Increment();
if (!(batch_fragment_shaders[depth_test][render_mode][transparency_mode][texture_mode][check_mask]
[dithering][interlacing] = g_gpu_device->CreateShader(
GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error)))
{
return false;
}
progress.Increment();
}
}
}
}
@ -1003,10 +1126,8 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.geometry_shader = nullptr;
plconfig.SetTargetFormats(VRAM_RT_FORMAT, depth_buffer_format);
plconfig.samples = m_multisamples;
plconfig.per_sample_shading = per_sample_shading;
plconfig.render_pass_flags = m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
// [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask]
@ -1026,8 +1147,11 @@ bool GPU_HW::CompilePipelines(Error* error)
// Can't generate shader blending.
(render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && !m_allow_shader_blend) ||
// Don't need multipass shaders.
(m_supports_framebuffer_fetch && (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))))
((m_supports_framebuffer_fetch || m_prefer_shader_blend) &&
(render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))) ||
// If using ROV depth, we only draw with shader blending.
(needs_rov_depth && render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend)))
{
progress.Increment(9 * 2 * 2 * 2);
continue;
@ -1049,12 +1173,12 @@ bool GPU_HW::CompilePipelines(Error* error)
static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::SpritePalette8Bit);
const bool sprite = (static_cast<BatchTextureMode>(texture_mode) >= BatchTextureMode::SpriteStart);
const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering);
const bool use_rov =
(render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && m_use_rov_for_shader_blend);
const bool use_shader_blending =
(render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) &&
((textured &&
NeedsShaderBlending(static_cast<GPUTransparencyMode>(transparency_mode), (check_mask != 0))) ||
check_mask));
(use_rov || ((render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) &&
NeedsShaderBlending(static_cast<GPUTransparencyMode>(transparency_mode),
static_cast<BatchTextureMode>(texture_mode), (check_mask != 0)))));
plconfig.input_layout.vertex_attributes =
textured ?
(uv_limits ? std::span<const GPUPipeline::VertexAttribute>(
@ -1066,14 +1190,14 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.vertex_shader =
batch_vertex_shaders[BoolToUInt8(textured)][BoolToUInt8(palette)][BoolToUInt8(sprite)].get();
plconfig.fragment_shader =
batch_fragment_shaders[render_mode]
batch_fragment_shaders[BoolToUInt8(depth_test && needs_rov_depth)][render_mode]
[use_shader_blending ? transparency_mode :
static_cast<u8>(GPUTransparencyMode::Disabled)]
[texture_mode][use_shader_blending ? check_mask : 0][dithering][interlacing]
.get();
Assert(plconfig.vertex_shader && plconfig.fragment_shader);
if (needs_depth_buffer)
if (needs_real_depth_buffer)
{
plconfig.depth.depth_test =
m_pgxp_depth_buffer ?
@ -1086,14 +1210,25 @@ bool GPU_HW::CompilePipelines(Error* error)
(depth_test && transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled));
}
plconfig.SetTargetFormats(use_rov ? GPUTexture::Format::Unknown : VRAM_RT_FORMAT,
needs_rov_depth ? GPUTexture::Format::Unknown : depth_buffer_format);
plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
plconfig.render_pass_flags =
use_rov ? GPUPipeline::BindRenderTargetsAsImages :
(needs_feedback_loop ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags);
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
if (!use_shader_blending &&
((static_cast<GPUTransparencyMode>(transparency_mode) != GPUTransparencyMode::Disabled &&
(static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) ||
(textured &&
IsBlendedTextureFiltering(sprite ? m_sprite_texture_filtering : m_texture_filtering))))
if (use_rov)
{
plconfig.blend.write_mask = 0;
}
else if (!use_shader_blending &&
((static_cast<GPUTransparencyMode>(transparency_mode) != GPUTransparencyMode::Disabled &&
(static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) ||
(textured &&
IsBlendedTextureFiltering(sprite ? m_sprite_texture_filtering : m_texture_filtering))))
{
plconfig.blend.enable = true;
plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One;
@ -1151,6 +1286,9 @@ bool GPU_HW::CompilePipelines(Error* error)
}
}
plconfig.SetTargetFormats(VRAM_RT_FORMAT, needs_rov_depth ? GPUTexture::Format::Unknown : depth_buffer_format);
plconfig.render_pass_flags = needs_feedback_loop ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags;
if (m_wireframe_mode != GPUWireframeMode::Disabled)
{
std::unique_ptr<GPUShader> gs = g_gpu_device->CreateShader(GPUShaderStage::Geometry, shadergen.GetLanguage(),
@ -1203,6 +1341,7 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.per_sample_shading = false;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.vertex_shader = fullscreen_quad_vertex_shader.get();
plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
// VRAM fill
for (u8 wrapped = 0; wrapped < 2; wrapped++)
@ -1217,8 +1356,8 @@ bool GPU_HW::CompilePipelines(Error* error)
return false;
plconfig.fragment_shader = fs.get();
plconfig.depth = needs_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() :
GPUPipeline::DepthState::GetNoTestsState();
plconfig.depth = needs_real_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() :
GPUPipeline::DepthState::GetNoTestsState();
if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@ -1237,10 +1376,10 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.fragment_shader = fs.get();
for (u8 depth_test = 0; depth_test < 2; depth_test++)
{
if (depth_test && !write_mask_as_depth)
if (depth_test && !m_write_mask_as_depth)
continue;
plconfig.depth.depth_write = needs_depth_buffer;
plconfig.depth.depth_write = needs_real_depth_buffer;
plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
@ -1268,10 +1407,10 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.fragment_shader = fs.get();
for (u8 depth_test = 0; depth_test < 2; depth_test++)
{
if (depth_test && !write_mask_as_depth)
if (depth_test && !m_write_mask_as_depth)
continue;
plconfig.depth.depth_write = needs_depth_buffer;
plconfig.depth.depth_write = needs_real_depth_buffer;
plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
@ -1301,10 +1440,8 @@ bool GPU_HW::CompilePipelines(Error* error)
progress.Increment();
}
plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
// VRAM update depth
if (needs_depth_buffer)
if (m_write_mask_as_depth)
{
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateVRAMUpdateDepthFragmentShader(), error);
@ -1325,6 +1462,7 @@ bool GPU_HW::CompilePipelines(Error* error)
}
plconfig.SetTargetFormats(VRAM_RT_FORMAT);
plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.samples = 1;
@ -1366,7 +1504,7 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.layout = depth_extract ? GPUPipeline::Layout::MultiTextureAndPushConstants :
GPUPipeline::Layout::SingleTextureAndPushConstants;
plconfig.color_formats[1] = depth_extract ? VRAM_DS_EXTRACT_FORMAT : GPUTexture::Format::Unknown;
plconfig.color_formats[1] = depth_extract ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
if (!(m_vram_extract_pipeline[shader] = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@ -1385,7 +1523,7 @@ bool GPU_HW::CompilePipelines(Error* error)
return false;
plconfig.fragment_shader = fs.get();
plconfig.SetTargetFormats(VRAM_DS_EXTRACT_FORMAT);
plconfig.SetTargetFormats(VRAM_DS_COLOR_FORMAT);
if (!(m_copy_depth_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
}
@ -1588,8 +1726,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written)
void GPU_HW::UpdateDepthBufferFromMaskBit()
{
if (m_pgxp_depth_buffer || !m_vram_depth_texture)
return;
DebugAssert(!m_pgxp_depth_buffer && m_vram_depth_texture && m_write_mask_as_depth);
// Viewport should already be set full, only need to fudge the scissor.
g_gpu_device->SetScissor(m_vram_texture->GetRect());
@ -1639,7 +1776,10 @@ void GPU_HW::ClearDepthBuffer()
{
GL_SCOPE("GPU_HW::ClearDepthBuffer()");
DebugAssert(m_pgxp_depth_buffer);
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), 1.0f);
if (m_use_rov_for_shader_blend)
g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF);
else
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), 1.0f);
m_last_depth_z = 1.0f;
}
@ -1690,10 +1830,41 @@ ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode
render_mode)][texture_mode][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)][check_mask]
.get());
if (render_mode != BatchRenderMode::ShaderBlend || m_supports_framebuffer_fetch)
g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex);
GL_INS_FMT("Texture mode: {}", s_batch_texture_modes[texture_mode]);
GL_INS_FMT("Transparency mode: {}", s_transparency_modes[static_cast<u8>(m_batch.transparency_mode)]);
GL_INS_FMT("Render mode: {}", s_batch_render_modes[static_cast<u8>(render_mode)]);
GL_INS_FMT("Mask bit test: {}", m_batch.check_mask_before_draw);
GL_INS_FMT("Interlacing: {}", m_batch.check_mask_before_draw);
// Activating ROV?
if (render_mode == BatchRenderMode::ShaderBlend)
{
if (m_use_rov_for_shader_blend)
{
if (!m_rov_active)
{
GL_INS("Activating ROV.");
m_rov_active = true;
SetVRAMRenderTarget();
}
g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex);
}
else if (m_supports_framebuffer_fetch)
{
// No barriers needed for FBFetch.
g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex);
}
else
{
// Barriers. Yucky.
g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full);
}
}
else
g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full);
{
g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex);
}
}
ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
@ -2733,12 +2904,14 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsTwoPassRendering() const
(!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled)));
}
ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const
ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency, BatchTextureMode texture_mode,
bool check_mask) const
{
return (m_allow_shader_blend &&
((check_mask && (m_pgxp_depth_buffer || !m_vram_depth_texture)) ||
transparency == GPUTransparencyMode::BackgroundMinusForeground ||
(!m_supports_dual_source_blend &&
((check_mask && !m_write_mask_as_depth) ||
(transparency != GPUTransparencyMode::Disabled && m_prefer_shader_blend) ||
(transparency == GPUTransparencyMode::BackgroundMinusForeground) ||
(!m_supports_dual_source_blend && texture_mode != BatchTextureMode::Disabled &&
(transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering) ||
IsBlendedTextureFiltering(m_sprite_texture_filtering)))));
}
@ -2799,7 +2972,7 @@ void GPU_HW::ResetBatchVertexDepth()
{
DEV_LOG("Resetting batch vertex depth");
if (m_vram_depth_texture && !m_pgxp_depth_buffer)
if (m_write_mask_as_depth)
UpdateDepthBufferFromMaskBit();
m_current_depth = 1;
@ -2874,6 +3047,7 @@ void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) co
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{
GL_SCOPE_FMT("FillVRAM({},{} => {},{} ({}x{}) with 0x{:08X}", x, y, x + width, y + height, width, height, color);
DeactivateROV();
if (m_sw_renderer)
{
@ -3027,6 +3201,8 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b
void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask,
bool check_mask, const GSVector4i bounds)
{
DeactivateROV();
std::unique_ptr<GPUTexture> upload_texture;
u32 map_index;
@ -3070,8 +3246,7 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
// the viewport should already be set to the full vram, so just adjust the scissor
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height());
g_gpu_device->SetPipeline(
m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer && NeedsDepthBuffer())].get());
g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
if (upload_texture)
{
@ -3121,6 +3296,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write);
AddUnclampedDrawnRectangle(dst_bounds);
DeactivateROV();
struct VRAMCopyUBOData
{
u32 u_src_x;
@ -3149,8 +3326,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetViewportAndScissor(dst_bounds_scaled);
g_gpu_device->SetPipeline(
m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer && NeedsDepthBuffer())]
.get());
m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && m_write_mask_as_depth)].get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
RestoreDeviceContext();
@ -3285,8 +3461,8 @@ void GPU_HW::DispatchRenderCommand()
{
// transparency mode change
const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
if (transparency_mode != GPUTransparencyMode::Disabled &&
(texture_mode == BatchTextureMode::Disabled || !NeedsShaderBlending(transparency_mode, check_mask_before_draw)))
if (transparency_mode != GPUTransparencyMode::Disabled && !m_rov_active && !m_prefer_shader_blend &&
!NeedsShaderBlending(transparency_mode, texture_mode, check_mask_before_draw))
{
static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}};
@ -3399,7 +3575,8 @@ void GPU_HW::FlushRender()
if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe)
{
if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.check_mask_before_draw))
if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.texture_mode, m_batch.check_mask_before_draw) ||
m_rov_active || (m_use_rov_for_shader_blend && m_pgxp_depth_buffer))
{
DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex);
}
@ -3416,6 +3593,8 @@ void GPU_HW::FlushRender()
if (m_wireframe_mode != GPUWireframeMode::Disabled)
{
// This'll be less than ideal, but wireframe is for debugging, so take the perf hit.
DeactivateROV();
g_gpu_device->SetPipeline(m_wireframe_pipeline.get());
g_gpu_device->DrawIndexed(index_count, base_index, base_vertex);
}
@ -3424,6 +3603,7 @@ void GPU_HW::FlushRender()
void GPU_HW::UpdateDisplay()
{
FlushRender();
DeactivateROV();
GL_SCOPE("UpdateDisplay()");
@ -3506,7 +3686,7 @@ void GPU_HW::UpdateDisplay()
((m_vram_extract_depth_texture && m_vram_extract_depth_texture->GetWidth() == scaled_display_width &&
m_vram_extract_depth_texture->GetHeight() == scaled_display_height) ||
!g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height,
GPUTexture::Type::RenderTarget, VRAM_DS_EXTRACT_FORMAT)))
GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT)))
{
depth_source->MakeReadyForSampling();
g_gpu_device->InvalidateRenderTarget(m_vram_extract_depth_texture.get());

View File

@ -144,7 +144,6 @@ private:
std::numeric_limits<s32>::min());
/// Returns true if a depth buffer should be created.
bool NeedsDepthBuffer() const;
GPUTexture::Format GetDepthBufferFormat() const;
bool CreateBuffers();
@ -165,6 +164,7 @@ private:
void ClearDepthBuffer();
void SetScissor();
void SetVRAMRenderTarget();
void DeactivateROV();
void MapGPUBuffer(u32 required_vertices, u32 required_indices);
void UnmapGPUBuffer(u32 used_vertices, u32 used_indices);
void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex);
@ -197,7 +197,7 @@ private:
bool NeedsTwoPassRendering() const;
/// Returns true if the draw is going to use shader blending/framebuffer fetch.
bool NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const;
bool NeedsShaderBlending(GPUTransparencyMode transparency, BatchTextureMode texture, bool check_mask) const;
void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const;
@ -281,8 +281,12 @@ private:
bool m_compute_uv_range : 1 = false;
bool m_allow_sprite_mode : 1 = false;
bool m_allow_shader_blend : 1 = false;
bool m_prefer_shader_blend : 1 = false;
bool m_use_rov_for_shader_blend : 1 = false;
bool m_write_mask_as_depth : 1 = false;
bool m_depth_was_copied : 1 = false;
bool m_texture_window_active : 1 = false;
bool m_rov_active : 1 = false;
u8 m_texpage_dirty = 0;

View File

@ -77,7 +77,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale
{
DeclareVertexEntryPoint(
ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1,
{{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}},
{{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"},
{"nointerpolation", "float4 v_uv_limits"}},
false, "", UsingMSAA(), UsingPerSampleShading(), m_disable_color_perspective);
}
else
@ -647,28 +648,26 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi
}
}
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode,
GPUTransparencyMode transparency,
GPU_HW::BatchTextureMode texture_mode,
GPUTextureFilter texture_filtering, bool uv_limits,
bool force_round_texcoords, bool dithering, bool interlacing,
bool check_mask)
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(
GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, GPU_HW::BatchTextureMode texture_mode,
GPUTextureFilter texture_filtering, bool uv_limits, bool force_round_texcoords, bool dithering, bool interlacing,
bool check_mask, bool use_rov, bool use_rov_depth, bool rov_depth_test)
{
// TODO: don't write depth for shader blend
DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend);
DebugAssert(!rov_depth_test || (use_rov && use_rov_depth));
const bool textured = (texture_mode != GPU_HW::BatchTextureMode::Disabled);
const bool palette =
(texture_mode == GPU_HW::BatchTextureMode::Palette4Bit || texture_mode == GPU_HW::BatchTextureMode::Palette8Bit);
const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend &&
(transparency != GPUTransparencyMode::Disabled || check_mask));
const bool use_dual_source = (!shader_blending && m_supports_dual_source_blend &&
const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend);
const bool use_dual_source = (!shader_blending && !use_rov && m_supports_dual_source_blend &&
((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled &&
render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) ||
texture_filtering != GPUTextureFilter::Nearest));
std::stringstream ss;
WriteHeader(ss);
WriteHeader(ss, use_rov);
DefineMacro(ss, "TRANSPARENCY", render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled);
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", render_mode == GPU_HW::BatchRenderMode::OnlyOpaque);
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", render_mode == GPU_HW::BatchRenderMode::OnlyTransparent);
@ -687,6 +686,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "TRUE_COLOR", m_true_color);
DefineMacro(ss, "TEXTURE_FILTERING", texture_filtering != GPUTextureFilter::Nearest);
DefineMacro(ss, "UV_LIMITS", uv_limits);
DefineMacro(ss, "USE_ROV", use_rov);
DefineMacro(ss, "USE_ROV_DEPTH", use_rov_depth);
DefineMacro(ss, "ROV_DEPTH_TEST", rov_depth_test);
DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source);
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords);
@ -696,6 +698,13 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
WriteBatchUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0);
if (use_rov)
{
DeclareImage(ss, "rov_color", 0);
if (use_rov_depth)
DeclareImage(ss, "rov_depth", 1, true);
}
if (m_glsl)
ss << "CONSTANT int[16] s_dither_values = int[16]( ";
else
@ -825,6 +834,7 @@ float3 ApplyDebanding(float2 frag_coord)
}
)";
const u32 num_fragment_outputs = use_rov ? 0 : (use_dual_source ? 2 : 1);
if (textured)
{
if (texture_filtering != GPUTextureFilter::Nearest)
@ -835,26 +845,29 @@ float3 ApplyDebanding(float2 frag_coord)
DeclareFragmentEntryPoint(ss, 1, 1,
{{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"},
{"nointerpolation", "float4 v_uv_limits"}},
true, use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth, UsingMSAA(),
UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending);
true, num_fragment_outputs, use_dual_source, m_write_mask_as_depth, UsingMSAA(),
UsingPerSampleShading(), false, m_disable_color_perspective,
shader_blending && !use_rov, use_rov);
}
else
{
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}}, true,
use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth, UsingMSAA(),
UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending);
num_fragment_outputs, use_dual_source, m_write_mask_as_depth, UsingMSAA(),
UsingPerSampleShading(), false, m_disable_color_perspective,
shader_blending && !use_rov, use_rov);
}
}
else
{
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth,
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, num_fragment_outputs, use_dual_source, m_write_mask_as_depth,
UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective,
shader_blending);
shader_blending && !use_rov, use_rov);
}
ss << R"(
{
uint3 vertcol = uint3(v_col0.rgb * float3(255.0, 255.0, 255.0) + ApplyDebanding(v_pos.xy));
uint2 fragpos = uint2(v_pos.xy);
bool semitransparent;
uint3 icolor;
@ -862,7 +875,7 @@ float3 ApplyDebanding(float2 frag_coord)
float oalpha;
#if INTERLACING
if ((uint(v_pos.y) & 1u) == u_interlaced_displayed_field)
if ((fragpos.y & 1u) == u_interlaced_displayed_field)
discard;
#endif
@ -891,7 +904,7 @@ float3 ApplyDebanding(float2 frag_coord)
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3;
icolor = (icolor * vertcol) >> 4;
#if DITHERING
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
icolor = ApplyDithering(fragpos, icolor);
#else
icolor = min(icolor >> 3, uint3(31u, 31u, 31u));
#endif
@ -899,7 +912,7 @@ float3 ApplyDebanding(float2 frag_coord)
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0) + ApplyDebanding(v_pos.xy));
icolor = (icolor * vertcol) >> 7;
#if DITHERING
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
icolor = ApplyDithering(fragpos, icolor);
#else
icolor = min(icolor, uint3(255u, 255u, 255u));
#endif
@ -914,7 +927,7 @@ float3 ApplyDebanding(float2 frag_coord)
ialpha = 1.0;
#if DITHERING
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
icolor = ApplyDithering(fragpos, icolor);
#else
#if !TRUE_COLOR
icolor >>= 3;
@ -925,29 +938,34 @@ float3 ApplyDebanding(float2 frag_coord)
oalpha = float(u_set_mask_while_drawing);
#endif
// Premultiply alpha so we don't need to use a colour output for it.
float premultiply_alpha = ialpha;
#if TRANSPARENCY && !SHADER_BLENDING
premultiply_alpha = ialpha * (semitransparent ? u_src_alpha_factor : 1.0);
#endif
float3 color;
#if !TRUE_COLOR
// We want to apply the alpha before the truncation to 16-bit, otherwise we'll be passing a 32-bit precision color
// into the blend unit, which can cause a small amount of error to accumulate.
color = floor(float3(icolor) * premultiply_alpha) / float3(31.0, 31.0, 31.0);
#else
// True color is actually simpler here since we want to preserve the precision.
color = (float3(icolor) * premultiply_alpha) / float3(255.0, 255.0, 255.0);
#endif
#if SHADER_BLENDING
float4 bg_col = LAST_FRAG_COLOR;
float4 fg_col = float4(color, oalpha);
#if USE_ROV
BEGIN_ROV_REGION;
float4 bg_col = ROV_LOAD(rov_color, fragpos);
float4 o_col0;
bool discarded = false;
#if CHECK_MASK_BIT
if (bg_col.a != 0.0)
discard;
#if ROV_DEPTH_TEST
float bg_depth = ROV_LOAD(rov_depth, fragpos).r;
discarded = (v_pos.z > bg_depth);
#endif
#if CHECK_MASK_BIT
discarded = discarded || (bg_col.a != 0.0);
#endif
#else
float4 bg_col = LAST_FRAG_COLOR;
#if CHECK_MASK_BIT
if (bg_col.a != 0.0)
discard;
#endif
#endif
// Work in normalized space for true colour, matches HW blend.
float4 fg_col = float4(float3(icolor), oalpha);
#if TRUE_COLOR
fg_col.rgb /= 255.0;
#elif TRANSPARENCY // rgb not used in check-mask only
bg_col.rgb = roundEven(bg_col.rgb * 31.0);
#endif
#if TEXTURE_FILTERING
@ -969,14 +987,87 @@ float3 ApplyDebanding(float2 frag_coord)
#else
o_col0.rgb = fg_col.rgb;
#endif
// 16-bit truncation.
#if !TRUE_COLOR && TRANSPARENCY
o_col0.rgb = floor(o_col0.rgb);
#endif
#if TRANSPARENCY
// If pixel isn't marked as semitransparent, replace with previous colour.
o_col0 = semitransparent ? o_col0 : fg_col;
#endif
#elif TRANSPARENCY && TEXTURED
// Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
if (semitransparent)
{
// Normalize for non-true-color.
#if !TRUE_COLOR
o_col0.rgb /= 31.0;
#endif
#if USE_ROV
if (!discarded)
{
ROV_STORE(rov_color, fragpos, o_col0);
#if USE_ROV_DEPTH
ROV_STORE(rov_depth, fragpos, float4(v_pos.z, 0.0, 0.0, 0.0));
#endif
}
END_ROV_REGION;
#endif
#else
// Premultiply alpha so we don't need to use a colour output for it.
float premultiply_alpha = ialpha;
#if TRANSPARENCY
premultiply_alpha = ialpha * (semitransparent ? u_src_alpha_factor : 1.0);
#endif
float3 color;
#if !TRUE_COLOR
// We want to apply the alpha before the truncation to 16-bit, otherwise we'll be passing a 32-bit precision color
// into the blend unit, which can cause a small amount of error to accumulate.
color = floor(float3(icolor) * premultiply_alpha) / 31.0;
#else
// True color is actually simpler here since we want to preserve the precision.
color = (float3(icolor) * premultiply_alpha) / 255.0;
#endif
#if TRANSPARENCY && TEXTURED
// Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
if (semitransparent)
{
#if USE_DUAL_SOURCE
o_col0 = float4(color, oalpha);
o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor / ialpha);
#else
o_col0 = float4(color, oalpha);
#endif
#if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z;
#endif
#if TRANSPARENCY_ONLY_OPAQUE
discard;
#endif
}
else
{
#if USE_DUAL_SOURCE
o_col0 = float4(color, oalpha);
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
#else
o_col0 = float4(color, oalpha);
#endif
#if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z;
#endif
#if TRANSPARENCY_ONLY_TRANSPARENT
discard;
#endif
}
#elif TRANSPARENCY
// We shouldn't be rendering opaque geometry only when untextured, so no need to test/discard here.
#if USE_DUAL_SOURCE
o_col0 = float4(color, oalpha);
o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor / ialpha);
@ -987,50 +1078,17 @@ float3 ApplyDebanding(float2 frag_coord)
#if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z;
#endif
#else
// Non-transparency won't enable blending so we can write the mask here regardless.
o_col0 = float4(color, oalpha);
#if TRANSPARENCY_ONLY_OPAQUE
discard;
#endif
}
else
{
#if USE_DUAL_SOURCE
o_col0 = float4(color, oalpha);
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
#else
o_col0 = float4(color, oalpha);
#endif
#if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z;
#endif
#if TRANSPARENCY_ONLY_TRANSPARENT
discard;
#endif
}
#elif TRANSPARENCY
// We shouldn't be rendering opaque geometry only when untextured, so no need to test/discard here.
#if USE_DUAL_SOURCE
o_col0 = float4(color, oalpha);
o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor / ialpha);
#else
o_col0 = float4(color, oalpha);
#endif
#if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z;
#endif
#else
// Non-transparency won't enable blending so we can write the mask here regardless.
o_col0 = float4(color, oalpha);
#if USE_DUAL_SOURCE
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
#endif
#if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z;
#endif
#endif
}

View File

@ -18,7 +18,7 @@ public:
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency,
GPU_HW::BatchTextureMode texture_mode, GPUTextureFilter texture_filtering,
bool uv_limits, bool force_round_texcoords, bool dithering, bool interlacing,
bool check_mask);
bool check_mask, bool use_rov, bool use_rov_depth, bool rov_depth_test);
std::string GenerateWireframeGeometryShader();
std::string GenerateWireframeFragmentShader();
std::string GenerateVRAMReadFragmentShader();

View File

@ -271,8 +271,10 @@ bool Host::CreateGPUDevice(RenderAPI api, Error* error)
disabled_features |= GPUDevice::FEATURE_MASK_FRAMEBUFFER_FETCH;
if (g_settings.gpu_disable_texture_buffers)
disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_BUFFERS;
if (g_settings.gpu_disable_texture_copy_to_self)
disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_COPY_TO_SELF;
if (g_settings.gpu_disable_memory_import)
disabled_features |= GPUDevice::FEATURE_MASK_MEMORY_IMPORT;
if (g_settings.gpu_disable_raster_order_views)
disabled_features |= GPUDevice::FEATURE_MASK_RASTER_ORDER_VIEWS;
Error create_error;
if (!g_gpu_device || !g_gpu_device->Create(g_settings.gpu_adapter,

View File

@ -191,6 +191,7 @@ void Settings::Load(SettingsInterface& si)
gpu_disable_texture_buffers = si.GetBoolValue("GPU", "DisableTextureBuffers", false);
gpu_disable_texture_copy_to_self = si.GetBoolValue("GPU", "DisableTextureCopyToSelf", false);
gpu_disable_memory_import = si.GetBoolValue("GPU", "DisableMemoryImport", false);
gpu_disable_raster_order_views = si.GetBoolValue("GPU", "DisableRasterOrderViews", false);
gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false);
gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true);
gpu_use_software_renderer_for_readbacks = si.GetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false);
@ -199,6 +200,7 @@ void Settings::Load(SettingsInterface& si)
gpu_debanding = si.GetBoolValue("GPU", "Debanding", false);
gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", true);
gpu_force_round_texcoords = si.GetBoolValue("GPU", "ForceRoundTextureCoordinates", false);
gpu_accurate_blending = si.GetBoolValue("GPU", "AccurateBlending", false);
gpu_texture_filter =
ParseTextureFilterName(
si.GetStringValue("GPU", "TextureFilter", GetTextureFilterName(DEFAULT_GPU_TEXTURE_FILTER)).c_str())
@ -494,6 +496,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const
si.SetBoolValue("GPU", "DisableTextureBuffers", gpu_disable_texture_buffers);
si.SetBoolValue("GPU", "DisableTextureCopyToSelf", gpu_disable_texture_copy_to_self);
si.SetBoolValue("GPU", "DisableMemoryImport", gpu_disable_memory_import);
si.SetBoolValue("GPU", "DisableRasterOrderViews", gpu_disable_raster_order_views);
}
si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading);
@ -504,6 +507,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const
si.SetBoolValue("GPU", "Debanding", gpu_debanding);
si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering);
si.SetBoolValue("GPU", "ForceRoundTextureCoordinates", gpu_force_round_texcoords);
si.SetBoolValue("GPU", "AccurateBlending", gpu_accurate_blending);
si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter));
si.SetStringValue(
"GPU", "SpriteTextureFilter",

View File

@ -114,11 +114,13 @@ struct Settings
bool gpu_disable_texture_buffers : 1 = false;
bool gpu_disable_texture_copy_to_self : 1 = false;
bool gpu_disable_memory_import : 1 = false;
bool gpu_disable_raster_order_views : 1 = false;
bool gpu_per_sample_shading : 1 = false;
bool gpu_true_color : 1 = true;
bool gpu_debanding : 1 = false;
bool gpu_scaled_dithering : 1 = true;
bool gpu_force_round_texcoords : 1 = false;
bool gpu_accurate_blending : 1 = false;
bool gpu_disable_interlacing : 1 = true;
bool gpu_force_ntsc_timings : 1 = false;
bool gpu_widescreen_hack : 1 = false;
@ -280,6 +282,7 @@ struct Settings
bool log_to_file : 1 = false;
ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); }
ALWAYS_INLINE bool IsUsingAccurateBlending() const { return (gpu_accurate_blending && !gpu_true_color); }
ALWAYS_INLINE bool IsRunaheadEnabled() const { return (runahead_frames > 0); }
ALWAYS_INLINE PGXPMode GetPGXPMode()

View File

@ -4,4 +4,4 @@
#pragma once
#include "common/types.h"
static constexpr u32 SHADER_CACHE_VERSION = 17;
static constexpr u32 SHADER_CACHE_VERSION = 18;

View File

@ -3938,6 +3938,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
g_settings.gpu_disable_texture_buffers != old_settings.gpu_disable_texture_buffers ||
g_settings.gpu_disable_texture_copy_to_self != old_settings.gpu_disable_texture_copy_to_self ||
g_settings.gpu_disable_memory_import != old_settings.gpu_disable_memory_import ||
g_settings.gpu_disable_raster_order_views != old_settings.gpu_disable_raster_order_views ||
g_settings.display_exclusive_fullscreen_control != old_settings.display_exclusive_fullscreen_control))
{
// if debug device/threaded presentation change, we need to recreate the whole display
@ -3950,6 +3951,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
g_settings.gpu_disable_texture_buffers != old_settings.gpu_disable_texture_buffers ||
g_settings.gpu_disable_texture_copy_to_self != old_settings.gpu_disable_texture_copy_to_self ||
g_settings.gpu_disable_memory_import != old_settings.gpu_disable_memory_import ||
g_settings.gpu_disable_raster_order_views != old_settings.gpu_disable_raster_order_views ||
g_settings.display_exclusive_fullscreen_control != old_settings.display_exclusive_fullscreen_control);
Host::AddIconOSDMessage("RendererSwitch", ICON_FA_PAINT_ROLLER,
@ -4055,6 +4057,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
g_settings.gpu_debanding != old_settings.gpu_debanding ||
g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering ||
g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords ||
g_settings.gpu_accurate_blending != old_settings.gpu_accurate_blending ||
g_settings.gpu_texture_filter != old_settings.gpu_texture_filter ||
g_settings.gpu_sprite_texture_filter != old_settings.gpu_sprite_texture_filter ||
g_settings.gpu_line_detect_mode != old_settings.gpu_line_detect_mode ||

View File

@ -138,6 +138,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
"UseSoftwareRendererForReadbacks", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.forceRoundedTexcoords, "GPU", "ForceRoundTextureCoordinates",
false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.accurateBlending, "GPU", "AccurateBlending", false);
SettingWidgetBinder::SetAvailability(m_ui.scaledDithering,
!m_dialog->hasGameTrait(GameDatabase::Trait::DisableScaledDithering));
@ -388,6 +389,10 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
m_ui.forceRoundedTexcoords, tr("Round Upscaled Texture Coordinates"), tr("Unchecked"),
tr("Rounds texture coordinates instead of flooring when upscaling. Can fix misaligned textures in some games, but "
"break others, and is incompatible with texture filtering."));
dialog->registerWidgetHelp(
m_ui.accurateBlending, tr("Accurate Blending"), tr("Unchecked"),
tr("Forces blending to be done in the shader at 16-bit precision, when not using true color. Very few games "
"actually require this, and there is a <strong>non-trivial</strong> performance cost."));
// PGXP Tab
@ -520,6 +525,12 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
dialog->registerWidgetHelp(m_ui.disableTextureCopyToSelf, tr("Disable Texture Copies To Self"), tr("Unchecked"),
tr("Disables the use of self-copy updates for the VRAM texture. Useful for testing broken "
"graphics drivers. <strong>Only for developer use.</strong>"));
dialog->registerWidgetHelp(m_ui.disableMemoryImport, tr("Disable Memory Import"), tr("Unchecked"),
tr("Disables the use of host memory importing. Useful for testing broken graphics "
"drivers. <strong>Only for developer use.</strong>"));
dialog->registerWidgetHelp(m_ui.disableRasterOrderViews, tr("Disable Rasterizer Order Views"), tr("Unchecked"),
tr("Disables the use of rasterizer order views. Useful for testing broken graphics "
"drivers. <strong>Only for developer use.</strong>"));
}
GraphicsSettingsWidget::~GraphicsSettingsWidget() = default;
@ -669,6 +680,8 @@ void GraphicsSettingsWidget::updateRendererDependentOptions()
m_ui.debanding->setEnabled(is_hardware);
m_ui.scaledDithering->setEnabled(is_hardware && !m_dialog->hasGameTrait(GameDatabase::Trait::DisableScaledDithering));
m_ui.useSoftwareRendererForReadbacks->setEnabled(is_hardware);
m_ui.forceRoundedTexcoords->setEnabled(is_hardware);
m_ui.accurateBlending->setEnabled(is_hardware);
m_ui.tabs->setTabEnabled(TAB_INDEX_TEXTURE_REPLACEMENTS, is_hardware);
@ -881,9 +894,9 @@ void GraphicsSettingsWidget::onTrueColorChanged()
const bool true_color = m_dialog->getEffectiveBoolValue("GPU", "TrueColor", false);
const bool allow_scaled_dithering =
(resolution_scale != 1 && !true_color && !m_dialog->hasGameTrait(GameDatabase::Trait::DisableScaledDithering));
const bool allow_debanding = true_color;
m_ui.scaledDithering->setEnabled(allow_scaled_dithering);
m_ui.debanding->setEnabled(allow_debanding);
m_ui.debanding->setEnabled(true_color);
m_ui.accurateBlending->setEnabled(!true_color);
}
void GraphicsSettingsWidget::onDownsampleModeChanged()

View File

@ -392,13 +392,6 @@
</item>
<item row="2" column="0" colspan="2">
<layout class="QGridLayout" name="gridLayout_5">
<item row="1" column="0">
<widget class="QCheckBox" name="gpuThread">
<property name="text">
<string>Threaded Rendering</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="scaledDithering">
<property name="text">
@ -406,13 +399,6 @@
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="useSoftwareRendererForReadbacks">
<property name="text">
<string>Software Renderer Readbacks</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="debanding">
<property name="text">
@ -420,13 +406,34 @@
</property>
</widget>
</item>
<item row="2" column="0">
<item row="2" column="1">
<widget class="QCheckBox" name="useSoftwareRendererForReadbacks">
<property name="text">
<string>Software Renderer Readbacks</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="forceRoundedTexcoords">
<property name="text">
<string>Round Upscaled Texture Coordinates</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="gpuThread">
<property name="text">
<string>Threaded Rendering</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="accurateBlending">
<property name="text">
<string>Accurate Blending</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="0" column="0">
@ -993,13 +1000,6 @@
<layout class="QFormLayout" name="formLayout_10">
<item row="0" column="0" colspan="2">
<layout class="QGridLayout" name="gridLayout_8">
<item row="1" column="1">
<widget class="QCheckBox" name="disableFramebufferFetch">
<property name="text">
<string>Disable Framebuffer Fetch</string>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QCheckBox" name="disableTextureCopyToSelf">
<property name="text">
@ -1007,13 +1007,6 @@
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="disableDualSource">
<property name="text">
<string>Disable Dual-Source Blending</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="useDebugDevice">
<property name="text">
@ -1021,10 +1014,17 @@
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="disableShaderCache">
<item row="1" column="0">
<widget class="QCheckBox" name="disableDualSource">
<property name="text">
<string>Disable Shader Cache</string>
<string>Disable Dual-Source Blending</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="disableFramebufferFetch">
<property name="text">
<string>Disable Framebuffer Fetch</string>
</property>
</widget>
</item>
@ -1035,6 +1035,27 @@
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="disableShaderCache">
<property name="text">
<string>Disable Shader Cache</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="disableMemoryImport">
<property name="text">
<string>Disable Memory Import</string>
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QCheckBox" name="disableRasterOrderViews">
<property name="text">
<string>Disable Rasterizer Order Views</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>

View File

@ -149,7 +149,7 @@ TinyString ShaderGen::GetGLSLVersionString(RenderAPI render_api, u32 version)
}
#endif
void ShaderGen::WriteHeader(std::stringstream& ss)
void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */)
{
if (m_shader_language == GPUShaderLanguage::GLSL || m_shader_language == GPUShaderLanguage::GLSLES)
ss << m_glsl_version_string << "\n\n";
@ -211,6 +211,11 @@ void ShaderGen::WriteHeader(std::stringstream& ss)
if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_ES_VERSION_3_1 && GLAD_GL_ARB_shader_storage_buffer_object)
ss << "#extension GL_ARB_shader_storage_buffer_object : require\n";
}
else if (m_shader_language == GPUShaderLanguage::GLSLVK)
{
if (enable_rov)
ss << "#extension GL_ARB_fragment_shader_interlock : require\n";
}
#endif
DefineMacro(ss, "API_OPENGL", m_render_api == RenderAPI::OpenGL);
@ -413,6 +418,27 @@ void ShaderGen::DeclareTextureBuffer(std::stringstream& ss, const char* name, u3
}
}
void ShaderGen::DeclareImage(std::stringstream& ss, const char* name, u32 index, bool is_float /* = false */,
bool is_int /* = false */, bool is_unsigned /* = false */)
{
if (m_glsl)
{
if (m_spirv)
ss << "layout(set = " << (m_has_uniform_buffer ? 2 : 1) << ", binding = " << index;
else
ss << "layout(binding = " << index;
ss << ", " << (is_int ? (is_unsigned ? "rgba8ui" : "rgba8i") : "rgba8") << ") "
<< "uniform restrict coherent image2D " << name << ";\n";
}
else
{
ss << "RasterizerOrderedTexture2D<"
<< (is_int ? (is_unsigned ? "uint4" : "int4") : (is_float ? "float4" : "unorm float4")) << "> " << name
<< " : register(u" << index << ");\n";
}
}
const char* ShaderGen::GetInterpolationQualifier(bool interface_block, bool centroid_interpolation,
bool sample_interpolation, bool is_out) const
{
@ -545,7 +571,8 @@ void ShaderGen::DeclareFragmentEntryPoint(
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs /* = */,
bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool dual_source_output /* = false */,
bool depth_output /* = false */, bool msaa /* = false */, bool ssaa /* = false */,
bool declare_sample_id /* = false */, bool noperspective_color /* = false */, bool feedback_loop /* = false */)
bool declare_sample_id /* = false */, bool noperspective_color /* = false */, bool feedback_loop /* = false */,
bool rov /* = false */)
{
if (m_glsl)
{
@ -603,6 +630,8 @@ void ShaderGen::DeclareFragmentEntryPoint(
if (feedback_loop)
{
Assert(!rov);
#ifdef ENABLE_OPENGL
if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES)
{
@ -647,6 +676,14 @@ void ShaderGen::DeclareFragmentEntryPoint(
}
#endif
}
else if (rov)
{
ss << "layout(pixel_interlock_ordered) in;\n";
ss << "#define ROV_LOAD(name, coords) imageLoad(name, ivec2(coords))\n";
ss << "#define ROV_STORE(name, coords, value) imageStore(name, ivec2(coords), value)\n";
ss << "#define BEGIN_ROV_REGION beginInvocationInterlockARB()\n";
ss << "#define END_ROV_REGION endInvocationInterlockARB()\n";
}
if (m_use_glsl_binding_layout)
{
@ -679,48 +716,64 @@ void ShaderGen::DeclareFragmentEntryPoint(
}
else
{
if (rov)
{
ss << "#define ROV_LOAD(name, coords) name[uint2(coords)]\n";
ss << "#define ROV_STORE(name, coords, value) name[uint2(coords)] = value\n";
ss << "#define BEGIN_ROV_REGION\n";
ss << "#define END_ROV_REGION\n";
}
const char* qualifier = GetInterpolationQualifier(false, msaa, ssaa, false);
ss << "void main(\n";
bool first = true;
for (u32 i = 0; i < num_color_inputs; i++)
ss << " " << qualifier << (noperspective_color ? "noperspective " : "") << "in float4 v_col" << i << " : COLOR"
<< i << ",\n";
{
ss << (first ? "" : ",\n") << " " << qualifier << (noperspective_color ? "noperspective " : "")
<< "in float4 v_col" << i << " : COLOR" << i;
first = false;
}
for (u32 i = 0; i < num_texcoord_inputs; i++)
ss << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
{
ss << (first ? "" : ",\n") << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i;
first = false;
}
u32 additional_counter = num_texcoord_inputs;
for (const auto& [qualifiers, name] : additional_inputs)
{
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
ss << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter << ",\n";
ss << (first ? "" : ",\n") << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter;
additional_counter++;
first = false;
}
if (declare_fragcoord)
ss << " in float4 v_pos : SV_Position,\n";
{
ss << (first ? "" : ",\n") << " in float4 v_pos : SV_Position";
first = false;
}
if (declare_sample_id)
ss << " in uint f_sample_index : SV_SampleIndex,\n";
{
ss << (first ? "" : ",\n") << " in uint f_sample_index : SV_SampleIndex";
first = false;
}
if (depth_output)
{
ss << " out float o_depth : SV_Depth";
if (num_color_outputs > 0)
ss << ",\n";
else
ss << ")\n";
ss << (first ? "" : ",\n") << " out float o_depth : SV_Depth";
first = false;
}
for (u32 i = 0; i < num_color_outputs; i++)
{
ss << " out float4 o_col" << i << " : SV_Target" << i;
if (i == (num_color_outputs - 1))
ss << ")\n";
else
ss << ",\n";
ss << (first ? "" : ",\n") << " out float4 o_col" << i << " : SV_Target" << i;
first = false;
}
ss << ")";
}
}

View File

@ -44,13 +44,15 @@ protected:
void DefineMacro(std::stringstream& ss, const char* name, bool enabled);
void DefineMacro(std::stringstream& ss, const char* name, s32 value);
void WriteHeader(std::stringstream& ss);
void WriteHeader(std::stringstream& ss, bool enable_rov = false);
void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan);
void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list<const char*>& members,
bool push_constant_on_vulkan);
void DeclareTexture(std::stringstream& ss, const char* name, u32 index, bool multisampled = false,
bool is_int = false, bool is_unsigned = false);
void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned);
void DeclareImage(std::stringstream& ss, const char* name, u32 index, bool is_float = false, bool is_int = false,
bool is_unsigned = false);
void DeclareVertexEntryPoint(std::stringstream& ss, const std::initializer_list<const char*>& attributes,
u32 num_color_outputs, u32 num_texcoord_outputs,
const std::initializer_list<std::pair<const char*, const char*>>& additional_outputs,
@ -62,7 +64,7 @@ protected:
bool declare_fragcoord = false, u32 num_color_outputs = 1, bool dual_source_output = false,
bool depth_output = false, bool msaa = false, bool ssaa = false,
bool declare_sample_id = false, bool noperspective_color = false,
bool feedback_loop = false);
bool feedback_loop = false, bool rov = false);
RenderAPI m_render_api;
GPUShaderLanguage m_shader_language;