GPU/HW: Reduce shader dependence on global constants

Step 1 of making resolution-independent shaders.
This commit is contained in:
Stenzek 2024-09-22 13:19:17 +10:00
parent f169b892c1
commit f9941c3f68
No known key found for this signature in database
2 changed files with 46 additions and 37 deletions

View File

@ -21,14 +21,8 @@ GPU_HW_ShaderGen::GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u
GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default; GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default;
void GPU_HW_ShaderGen::WriteCommonFunctions(std::stringstream& ss) void GPU_HW_ShaderGen::WriteColorConversionFunctions(std::stringstream& ss)
{ {
DefineMacro(ss, "MULTISAMPLING", UsingMSAA());
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
ss << "CONSTANT uint MULTISAMPLES = " << m_multisamples << "u;\n";
ss << "CONSTANT bool PER_SAMPLE_SHADING = " << (m_per_sample_shading ? "true" : "false") << ";\n";
ss << R"( ss << R"(
uint RGBA8ToRGBA5551(float4 v) uint RGBA8ToRGBA5551(float4 v)
{ {
@ -70,8 +64,10 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale
DefineMacro(ss, "UV_LIMITS", uv_limits); DefineMacro(ss, "UV_LIMITS", uv_limits);
DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords); DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords);
DefineMacro(ss, "PGXP_DEPTH", pgxp_depth); DefineMacro(ss, "PGXP_DEPTH", pgxp_depth);
DefineMacro(ss, "UPSCALED", m_resolution_scale > 1);
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
WriteCommonFunctions(ss);
WriteBatchUniformBuffer(ss); WriteBatchUniformBuffer(ss);
if (textured) if (textured)
@ -102,7 +98,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale
// Offset the vertex position by 0.5 to ensure correct interpolation of texture coordinates // Offset the vertex position by 0.5 to ensure correct interpolation of texture coordinates
// at 1x resolution scale. This doesn't work at >1x, we adjust the texture coordinates before // at 1x resolution scale. This doesn't work at >1x, we adjust the texture coordinates before
// uploading there instead. // uploading there instead.
float vertex_offset = (RESOLUTION_SCALE == 1u) ? 0.5 : 0.0; float vertex_offset = (UPSCALED == 0) ? 0.5 : 0.0;
// 0..+1023 -> -1..1 // 0..+1023 -> -1..1
float pos_x = ((a_pos.x + vertex_offset) / 512.0) - 1.0; float pos_x = ((a_pos.x + vertex_offset) / 512.0) - 1.0;
@ -744,7 +740,12 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(
DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords); DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords);
DefineMacro(ss, "UPSCALED", m_resolution_scale > 1); DefineMacro(ss, "UPSCALED", m_resolution_scale > 1);
WriteCommonFunctions(ss); // Used for converting to normalized coordinates for sampling.
ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0 / float(" << VRAM_WIDTH << "), 1.0 / float(" << VRAM_HEIGHT
<< "));\n";
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
WriteColorConversionFunctions(ss);
WriteBatchUniformBuffer(ss); WriteBatchUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0); DeclareTexture(ss, "samp0", 0);
@ -807,7 +808,7 @@ uint2 FloatToIntegerCoords(float2 coords)
{ {
// With the vertex offset applied at 1x resolution scale, we want to round the texture coordinates. // With the vertex offset applied at 1x resolution scale, we want to round the texture coordinates.
// Floor them otherwise, as it currently breaks when upscaling as the vertex offset is not applied. // Floor them otherwise, as it currently breaks when upscaling as the vertex offset is not applied.
return uint2((RESOLUTION_SCALE == 1u || FORCE_ROUND_TEXCOORDS != 0) ? roundEven(coords) : floor(coords)); return uint2((UPSCALED == 0 || FORCE_ROUND_TEXCOORDS != 0) ? roundEven(coords) : floor(coords));
} }
float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords)
@ -825,7 +826,7 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords)
#endif #endif
// load colour/palette // load colour/palette
float4 texel = LOAD_TEXTURE(samp0, int2(vicoord * RESOLUTION_SCALE), 0); float4 texel = SAMPLE_TEXTURE_LEVEL(samp0, float2(vicoord) * RCP_VRAM_SIZE, 0);
uint vram_value = RGBA8ToRGBA5551(texel); uint vram_value = RGBA8ToRGBA5551(texel);
// apply palette // apply palette
@ -840,13 +841,13 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords)
uint2 palette_icoord = uint2(((texpage.z + palette_index) & 0x3FFu), texpage.w); uint2 palette_icoord = uint2(((texpage.z + palette_index) & 0x3FFu), texpage.w);
#endif #endif
return LOAD_TEXTURE(samp0, int2(palette_icoord * RESOLUTION_SCALE), 0); return SAMPLE_TEXTURE_LEVEL(samp0, float2(palette_icoord) * RCP_VRAM_SIZE, 0);
#else #else
// Direct texturing - usually render-to-texture effects. // Direct texturing - usually render-to-texture effects.
uint2 vicoord;
#if !UPSCALED #if !UPSCALED
uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords));
vicoord = (texpage.xy + icoord) & uint2(1023, 511); uint2 vicoord = (texpage.xy + icoord) & uint2(1023, 511);
return LOAD_TEXTURE(samp0, int2(vicoord), 0);
#else #else
// Coordinates are already upscaled, we need to downscale them to apply the texture // Coordinates are already upscaled, we need to downscale them to apply the texture
// window, then re-upscale/offset. We can't round here, because it could result in // window, then re-upscale/offset. We can't round here, because it could result in
@ -855,11 +856,9 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords)
float2 nfpart = frac(ncoords); float2 nfpart = frac(ncoords);
uint2 nicoord = ApplyTextureWindow(uint2(floor(ncoords))); uint2 nicoord = ApplyTextureWindow(uint2(floor(ncoords)));
uint2 nvicoord = (texpage.xy + nicoord) & uint2(1023, 511); uint2 nvicoord = (texpage.xy + nicoord) & uint2(1023, 511);
coords = (float2(nvicoord) + nfpart) * float(RESOLUTION_SCALE); ncoords = (float2(nvicoord) + nfpart);
vicoord = uint2(floor(coords)); return SAMPLE_TEXTURE_LEVEL(samp0, ncoords * RCP_VRAM_SIZE, 0);
#endif #endif
return LOAD_TEXTURE(samp0, int2(vicoord), 0);
#endif #endif
} }
@ -1133,11 +1132,15 @@ std::string GPU_HW_ShaderGen::GenerateVRAMExtractFragmentShader(bool color_24bit
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteColorConversionFunctions(ss);
DefineMacro(ss, "COLOR_24BIT", color_24bit); DefineMacro(ss, "COLOR_24BIT", color_24bit);
DefineMacro(ss, "DEPTH_BUFFER", depth_buffer); DefineMacro(ss, "DEPTH_BUFFER", depth_buffer);
DefineMacro(ss, "MULTISAMPLED", UsingMSAA()); DefineMacro(ss, "MULTISAMPLING", UsingMSAA());
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
ss << "CONSTANT uint MULTISAMPLES = " << m_multisamples << "u;\n";
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_skip_x", "uint u_line_skip"}, true); DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_skip_x", "uint u_line_skip"}, true);
DeclareTexture(ss, "samp0", 0, UsingMSAA()); DeclareTexture(ss, "samp0", 0, UsingMSAA());
if (depth_buffer) if (depth_buffer)
@ -1217,7 +1220,6 @@ std::string GPU_HW_ShaderGen::GenerateWireframeGeometryShader()
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss);
if (m_glsl) if (m_glsl)
{ {
@ -1291,7 +1293,6 @@ std::string GPU_HW_ShaderGen::GenerateWireframeFragmentShader()
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareFragmentEntryPoint(ss, 0, 0); DeclareFragmentEntryPoint(ss, 0, 0);
ss << R"( ss << R"(
@ -1307,9 +1308,13 @@ std::string GPU_HW_ShaderGen::GenerateVRAMReadFragmentShader()
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteColorConversionFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size"}, true);
DefineMacro(ss, "MULTISAMPLING", UsingMSAA());
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
ss << "CONSTANT uint MULTISAMPLES = " << m_multisamples << "u;\n";
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size"}, true);
DeclareTexture(ss, "samp0", 0, UsingMSAA()); DeclareTexture(ss, "samp0", 0, UsingMSAA());
ss << R"( ss << R"(
@ -1366,9 +1371,14 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteColorConversionFunctions(ss);
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
DefineMacro(ss, "USE_BUFFER", use_buffer); DefineMacro(ss, "USE_BUFFER", use_buffer);
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ");\n";
DeclareUniformBuffer(ss, DeclareUniformBuffer(ss,
{"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset", {"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset",
"uint u_mask_or_bits", "float u_depth_value"}, "uint u_mask_or_bits", "float u_depth_value"},
@ -1414,8 +1424,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b
// find offset from the start of the row/column // find offset from the start of the row/column
uint2 offset; uint2 offset;
offset.x = (coords.x < u_base_coords.x) ? ((VRAM_SIZE.x / RESOLUTION_SCALE) - u_base_coords.x + coords.x) : (coords.x - u_base_coords.x); offset.x = (coords.x < u_base_coords.x) ? (VRAM_SIZE.x - u_base_coords.x + coords.x) : (coords.x - u_base_coords.x);
offset.y = (coords.y < u_base_coords.y) ? ((VRAM_SIZE.y / RESOLUTION_SCALE) - u_base_coords.y + coords.y) : (coords.y - u_base_coords.y); offset.y = (coords.y < u_base_coords.y) ? (VRAM_SIZE.y - u_base_coords.y + coords.y) : (coords.y - u_base_coords.y);
#if !USE_BUFFER #if !USE_BUFFER
uint value = LOAD_TEXTURE(samp0, int2(offset), 0).x; uint value = LOAD_TEXTURE(samp0, int2(offset), 0).x;
@ -1440,15 +1450,18 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss);
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
DefineMacro(ss, "MSAA_COPY", msaa);
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
DeclareUniformBuffer(ss, DeclareUniformBuffer(ss,
{"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size", {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size",
"bool u_set_mask_bit", "float u_depth_value"}, "bool u_set_mask_bit", "float u_depth_value"},
true); true);
DeclareTexture(ss, "samp0", 0, msaa); DeclareTexture(ss, "samp0", 0, msaa);
DefineMacro(ss, "MSAA_COPY", msaa);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, m_write_mask_as_depth, false, false, msaa); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, m_write_mask_as_depth, false, false, msaa);
ss << R"( ss << R"(
{ {
@ -1488,7 +1501,6 @@ std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss);
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
DefineMacro(ss, "WRAPPED", wrapped); DefineMacro(ss, "WRAPPED", wrapped);
DefineMacro(ss, "INTERLACED", interlaced); DefineMacro(ss, "INTERLACED", interlaced);
@ -1530,7 +1542,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); DefineMacro(ss, "MULTISAMPLING", UsingMSAA());
DeclareTexture(ss, "samp0", 0, UsingMSAA()); DeclareTexture(ss, "samp0", 0, UsingMSAA());
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 0, false, true, false, false, UsingMSAA()); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 0, false, true, false, false, UsingMSAA());
@ -1575,7 +1587,6 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool f
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss);
WriteAdaptiveDownsampleUniformBuffer(ss); WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);
DefineMacro(ss, "FIRST_PASS", first_pass); DefineMacro(ss, "FIRST_PASS", first_pass);
@ -1634,7 +1645,7 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleBlurFragmentShader()
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteColorConversionFunctions(ss);
WriteAdaptiveDownsampleUniformBuffer(ss); WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);
@ -1667,7 +1678,6 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleCompositeFragmentShader(
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);
DeclareTexture(ss, "samp1", 1, false); DeclareTexture(ss, "samp1", 1, false);
@ -1689,7 +1699,6 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_base_coords"}, true); DeclareUniformBuffer(ss, {"uint2 u_base_coords"}, true);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);

View File

@ -40,7 +40,7 @@ private:
ALWAYS_INLINE bool UsingMSAA() const { return m_multisamples > 1; } ALWAYS_INLINE bool UsingMSAA() const { return m_multisamples > 1; }
ALWAYS_INLINE bool UsingPerSampleShading() const { return m_multisamples > 1 && m_per_sample_shading; } ALWAYS_INLINE bool UsingPerSampleShading() const { return m_multisamples > 1 && m_per_sample_shading; }
void WriteCommonFunctions(std::stringstream& ss); void WriteColorConversionFunctions(std::stringstream& ss);
void WriteBatchUniformBuffer(std::stringstream& ss); void WriteBatchUniformBuffer(std::stringstream& ss);
void WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter); void WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter);
void WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss); void WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss);