GPU/HW: Make batch shaders independent on resolution

Almost a 6x reduction in *compressed* shader cache size.
This commit is contained in:
Stenzek 2024-09-22 13:45:01 +10:00
parent f9941c3f68
commit 986e207cff
No known key found for this signature in database
3 changed files with 19 additions and 23 deletions

View File

@ -297,8 +297,6 @@ void GPU_HW::Reset(bool clear_vram)
m_sw_renderer->Reset(); m_sw_renderer->Reset();
m_batch = {}; m_batch = {};
m_batch_ubo_data = {};
m_batch_ubo_dirty = true;
m_current_depth = 1; m_current_depth = 1;
SetClampedDrawingArea(); SetClampedDrawingArea();
@ -862,6 +860,11 @@ bool GPU_HW::CreateBuffers()
INFO_LOG("Created HW framebuffer of {}x{}", texture_width, texture_height); INFO_LOG("Created HW framebuffer of {}x{}", texture_width, texture_height);
m_batch_ubo_data.u_resolution_scale = static_cast<float>(m_resolution_scale);
m_batch_ubo_data.u_rcp_resolution_scale = 1.0f / m_batch_ubo_data.u_resolution_scale;
m_batch_ubo_data.u_resolution_scale_minus_one = m_batch_ubo_data.u_resolution_scale - 1.0f;
m_batch_ubo_dirty = true;
SetVRAMRenderTarget(); SetVRAMRenderTarget();
SetFullVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle();
return true; return true;

View File

@ -127,6 +127,10 @@ private:
float u_dst_alpha_factor; float u_dst_alpha_factor;
u32 u_interlaced_displayed_field; u32 u_interlaced_displayed_field;
u32 u_set_mask_while_drawing; u32 u_set_mask_while_drawing;
float u_resolution_scale;
float u_rcp_resolution_scale;
float u_resolution_scale_minus_one;
u32 pad;
}; };
struct RendererStats struct RendererStats

View File

@ -50,7 +50,8 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss)
DeclareUniformBuffer(ss, DeclareUniformBuffer(ss,
{"uint2 u_texture_window_and", "uint2 u_texture_window_or", "float u_src_alpha_factor", {"uint2 u_texture_window_and", "uint2 u_texture_window_or", "float u_src_alpha_factor",
"float u_dst_alpha_factor", "uint u_interlaced_displayed_field", "float u_dst_alpha_factor", "uint u_interlaced_displayed_field",
"bool u_set_mask_while_drawing"}, "bool u_set_mask_while_drawing", "float u_resolution_scale", "float u_rcp_resolution_scale",
"float u_resolution_scale_minus_one"},
false); false);
} }
@ -66,8 +67,6 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale
DefineMacro(ss, "PGXP_DEPTH", pgxp_depth); DefineMacro(ss, "PGXP_DEPTH", pgxp_depth);
DefineMacro(ss, "UPSCALED", m_resolution_scale > 1); DefineMacro(ss, "UPSCALED", m_resolution_scale > 1);
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
WriteBatchUniformBuffer(ss); WriteBatchUniformBuffer(ss);
if (textured) if (textured)
@ -129,7 +128,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale
#if TEXTURED #if TEXTURED
v_tex0 = float2(uint2(a_texcoord & 0xFFFFu, a_texcoord >> 16)); v_tex0 = float2(uint2(a_texcoord & 0xFFFFu, a_texcoord >> 16));
#if !PALETTE #if !PALETTE
v_tex0 *= float(RESOLUTION_SCALE); v_tex0 *= u_resolution_scale;
#endif #endif
// base_x,base_y,palette_x,palette_y // base_x,base_y,palette_x,palette_y
@ -151,8 +150,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale
// Treat coordinates as being in upscaled space, and extend the UV range to all "upscaled" // Treat coordinates as being in upscaled space, and extend the UV range to all "upscaled"
// pixels. This means 1-pixel-high polygon-based framebuffer effects won't be downsampled. // pixels. This means 1-pixel-high polygon-based framebuffer effects won't be downsampled.
// (e.g. Mega Man Legends 2 haze effect) // (e.g. Mega Man Legends 2 haze effect)
v_uv_limits *= float(RESOLUTION_SCALE); v_uv_limits *= u_resolution_scale;
v_uv_limits.zw += float(RESOLUTION_SCALE - 1u); v_uv_limits.zw += u_resolution_scale_minus_one;
#endif #endif
#endif #endif
#endif #endif
@ -743,7 +742,6 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(
// Used for converting to normalized coordinates for sampling. // Used for converting to normalized coordinates for sampling.
ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0 / float(" << VRAM_WIDTH << "), 1.0 / float(" << VRAM_HEIGHT ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0 / float(" << VRAM_WIDTH << "), 1.0 / float(" << VRAM_HEIGHT
<< "));\n"; << "));\n";
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
WriteColorConversionFunctions(ss); WriteColorConversionFunctions(ss);
WriteBatchUniformBuffer(ss); WriteBatchUniformBuffer(ss);
@ -774,18 +772,13 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(
ss << R"( ss << R"(
uint3 ApplyDithering(uint2 coord, uint3 icol) uint3 ApplyDithering(uint2 coord, uint3 icol)
{ {
#if DITHERING_SCALED #if (DITHERING_SCALED != 0 || UPSCALED == 0)
uint2 fc = coord & uint2(3u, 3u); uint2 fc = coord & uint2(3u, 3u);
#else #else
uint2 fc = (coord / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & uint2(3u, 3u); uint2 fc = uint2(float2(coord) * u_rcp_resolution_scale) & uint2(3u, 3u);
#endif #endif
int offset = s_dither_values[fc.y * 4u + fc.x]; int offset = s_dither_values[fc.y * 4u + fc.x];
return uint3(clamp((int3(icol) + offset) >> 3, 0, 31));
#if !TRUE_COLOR
return uint3(clamp((int3(icol) + int3(offset, offset, offset)) >> 3, 0, 31));
#else
return uint3(clamp(int3(icol) + int3(offset, offset, offset), 0, 255));
#endif
} }
#if TEXTURED #if TEXTURED
@ -852,7 +845,7 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords)
// Coordinates are already upscaled, we need to downscale them to apply the texture // Coordinates are already upscaled, we need to downscale them to apply the texture
// window, then re-upscale/offset. We can't round here, because it could result in // window, then re-upscale/offset. We can't round here, because it could result in
// going outside of the texture window. // going outside of the texture window.
float2 ncoords = coords / float(RESOLUTION_SCALE); float2 ncoords = coords * u_rcp_resolution_scale;
float2 nfpart = frac(ncoords); float2 nfpart = frac(ncoords);
uint2 nicoord = ApplyTextureWindow(uint2(floor(ncoords))); uint2 nicoord = ApplyTextureWindow(uint2(floor(ncoords)));
uint2 nvicoord = (texpage.xy + nicoord) & uint2(1023, 511); uint2 nvicoord = (texpage.xy + nicoord) & uint2(1023, 511);
@ -942,12 +935,8 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords)
#else #else
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)); icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0));
icolor = (icolor * vertcol) >> 7; icolor = (icolor * vertcol) >> 7;
#if DITHERING
icolor = ApplyDithering(fragpos, icolor);
#else
icolor = min(icolor, uint3(255u, 255u, 255u)); icolor = min(icolor, uint3(255u, 255u, 255u));
#endif #endif
#endif
// Compute output alpha (mask bit) // Compute output alpha (mask bit)
oalpha = float(u_set_mask_while_drawing ? 1 : int(semitransparent)); oalpha = float(u_set_mask_while_drawing ? 1 : int(semitransparent));