GPU/HW: Eliminate raw texture pipeline permutations

This commit is contained in:
Stenzek 2024-06-16 16:58:50 +10:00
parent 51648b0714
commit c1792108f4
No known key found for this signature in database
5 changed files with 48 additions and 66 deletions

View File

@ -784,19 +784,21 @@ bool GPU_HW::CompilePipelines()
m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch, m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch,
m_debanding); m_debanding);
const u32 total_pipelines = 2 + // vertex shaders constexpr u32 active_texture_modes = 4;
(5 * 5 * 9 * 2 * 2 * 2) + // fragment shaders const u32 total_pipelines =
((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * 9 * 2 * 2 * 2) + // batch pipelines 2 + // vertex shaders
((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe (active_texture_modes * 5 * 9 * 2 * 2 * 2) + // fragment shaders
1 + // fullscreen quad VS ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines
(2 * 2) + // vram fill ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe
(1 + BoolToUInt32(write_mask_as_depth)) + // vram copy 1 + // fullscreen quad VS
(1 + BoolToUInt32(write_mask_as_depth)) + // vram write (2 * 2) + // vram fill
1 + // vram write replacement (1 + BoolToUInt32(write_mask_as_depth)) + // vram copy
(needs_depth_buffer ? 1 : 0) + // mask -> depth (1 + BoolToUInt32(write_mask_as_depth)) + // vram write
1 + // vram read 1 + // vram write replacement
2 + // extract/display (needs_depth_buffer ? 1 : 0) + // mask -> depth
((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample 1 + // vram read
2 + // extract/display
((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample
ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines); ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines);
@ -804,7 +806,7 @@ bool GPU_HW::CompilePipelines()
// fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] // fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing]
static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); }; static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); };
DimensionalArray<std::unique_ptr<GPUShader>, 2> batch_vertex_shaders{}; DimensionalArray<std::unique_ptr<GPUShader>, 2> batch_vertex_shaders{};
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, 9, 5, 5> batch_fragment_shaders{}; DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5> batch_fragment_shaders{};
ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
batch_vertex_shaders.enumerate(destroy_shader); batch_vertex_shaders.enumerate(destroy_shader);
batch_fragment_shaders.enumerate(destroy_shader); batch_fragment_shaders.enumerate(destroy_shader);
@ -835,11 +837,11 @@ bool GPU_HW::CompilePipelines()
(m_supports_framebuffer_fetch && (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) || (m_supports_framebuffer_fetch && (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent)))) render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))))
{ {
progress.Increment(9 * 2 * 2 * 2); progress.Increment(4 * 2 * 2 * 2);
continue; continue;
} }
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++)
{ {
for (u8 check_mask = 0; check_mask < 2; check_mask++) for (u8 check_mask = 0; check_mask < 2; check_mask++)
{ {
@ -926,7 +928,7 @@ bool GPU_HW::CompilePipelines()
continue; continue;
} }
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++)
{ {
for (u8 dithering = 0; dithering < 2; dithering++) for (u8 dithering = 0; dithering < 2; dithering++)
{ {
@ -1954,11 +1956,11 @@ void GPU_HW::LoadVertices()
{ {
case GPUPrimitive::Polygon: case GPUPrimitive::Polygon:
{ {
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable; const bool textured = rc.texture_enable;
const bool raw_texture = textured && rc.raw_texture_enable;
const bool shaded = rc.shading_enable;
const bool pgxp = g_settings.gpu_pgxp_enable; const bool pgxp = g_settings.gpu_pgxp_enable;
const u32 first_color = rc.color_for_first_vertex;
const u32 num_vertices = rc.quad_polygon ? 4 : 3; const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<BatchVertex, 4> vertices; std::array<BatchVertex, 4> vertices;
std::array<std::array<s32, 2>, 4> native_vertex_positions; std::array<std::array<s32, 2>, 4> native_vertex_positions;
@ -1966,7 +1968,8 @@ void GPU_HW::LoadVertices()
bool valid_w = g_settings.gpu_pgxp_texture_correction; bool valid_w = g_settings.gpu_pgxp_texture_correction;
for (u32 i = 0; i < num_vertices; i++) for (u32 i = 0; i < num_vertices; i++)
{ {
const u32 color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; const u32 vert_color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
const u32 color = raw_texture ? UINT32_C(0x00808080) : vert_color;
const u64 maddr_and_pos = m_fifo.Pop(); const u64 maddr_and_pos = m_fifo.Pop();
const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; const GPUVertexPosition vp{Truncate32(maddr_and_pos)};
const u16 texcoord = textured ? Truncate16(FifoPop()) : 0; const u16 texcoord = textured ? Truncate16(FifoPop()) : 0;
@ -2123,7 +2126,7 @@ void GPU_HW::LoadVertices()
case GPUPrimitive::Rectangle: case GPUPrimitive::Rectangle:
{ {
const u32 color = rc.color_for_first_vertex; const u32 color = (rc.texture_enable && rc.raw_texture_enable) ? UINT32_C(0x00808080) : rc.color_for_first_vertex;
const GPUVertexPosition vp{FifoPop()}; const GPUVertexPosition vp{FifoPop()};
const s32 pos_x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); const s32 pos_x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x);
const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y);
@ -2967,7 +2970,7 @@ void GPU_HW::DispatchRenderCommand()
{ {
const GPURenderCommand rc{m_render_command.bits}; const GPURenderCommand rc{m_render_command.bits};
GPUTextureMode texture_mode; GPUTextureMode texture_mode = GPUTextureMode::Disabled;
if (rc.IsTexturingEnabled()) if (rc.IsTexturingEnabled())
{ {
// texture page changed - check that the new page doesn't intersect the drawing area // texture page changed - check that the new page doesn't intersect the drawing area
@ -3027,16 +3030,9 @@ void GPU_HW::DispatchRenderCommand()
} }
} }
texture_mode = m_draw_mode.mode_reg.texture_mode; texture_mode = (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit2) ?
if (rc.raw_texture_enable) GPUTextureMode::Direct16Bit :
{ m_draw_mode.mode_reg.texture_mode;
texture_mode =
static_cast<GPUTextureMode>(static_cast<u8>(texture_mode) | static_cast<u8>(GPUTextureMode::RawTextureBit));
}
}
else
{
texture_mode = GPUTextureMode::Disabled;
} }
// has any state changed which requires a new batch? // has any state changed which requires a new batch?

View File

@ -57,7 +57,8 @@ private:
{ {
MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2, MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2,
MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) * MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
(((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u) (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u),
NUM_TEXTURE_MODES = 4,
}; };
enum : u8 enum : u8
{ {

View File

@ -638,8 +638,6 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
// TODO: don't write depth for shader blend // TODO: don't write depth for shader blend
DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend); DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend);
const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit;
const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit;
const bool textured = (texture_mode != GPUTextureMode::Disabled); const bool textured = (texture_mode != GPUTextureMode::Disabled);
const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend && const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend &&
(transparency != GPUTransparencyMode::Disabled || check_mask)); (transparency != GPUTransparencyMode::Disabled || check_mask));
@ -658,10 +656,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "CHECK_MASK_BIT", check_mask); DefineMacro(ss, "CHECK_MASK_BIT", check_mask);
DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "PALETTE", DefineMacro(ss, "PALETTE",
actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit); texture_mode == GPUTextureMode::Palette4Bit || texture_mode == GPUTextureMode::Palette8Bit);
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPUTextureMode::Palette4Bit); DefineMacro(ss, "PALETTE_4_BIT", texture_mode == GPUTextureMode::Palette4Bit);
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPUTextureMode::Palette8Bit); DefineMacro(ss, "PALETTE_8_BIT", texture_mode == GPUTextureMode::Palette8Bit);
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
DefineMacro(ss, "DITHERING", dithering); DefineMacro(ss, "DITHERING", dithering);
DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering); DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering);
// Debanding requires true color to work correctly. // Debanding requires true color to work correctly.
@ -873,23 +870,19 @@ float3 ApplyDebanding(float2 frag_coord)
// If not using true color, truncate the framebuffer colors to 5-bit. // If not using true color, truncate the framebuffer colors to 5-bit.
#if !TRUE_COLOR #if !TRUE_COLOR
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3; icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3;
#if !RAW_TEXTURE icolor = (icolor * vertcol) >> 4;
icolor = (icolor * vertcol) >> 4; #if DITHERING
#if DITHERING icolor = ApplyDithering(uint2(v_pos.xy), icolor);
icolor = ApplyDithering(uint2(v_pos.xy), icolor); #else
#else icolor = min(icolor >> 3, uint3(31u, 31u, 31u));
icolor = min(icolor >> 3, uint3(31u, 31u, 31u));
#endif
#endif #endif
#else #else
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0) + ApplyDebanding(v_pos.xy)); icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0) + ApplyDebanding(v_pos.xy));
#if !RAW_TEXTURE icolor = (icolor * vertcol) >> 7;
icolor = (icolor * vertcol) >> 7; #if DITHERING
#if DITHERING icolor = ApplyDithering(uint2(v_pos.xy), icolor);
icolor = ApplyDithering(uint2(v_pos.xy), icolor); #else
#else icolor = min(icolor, uint3(255u, 255u, 255u));
icolor = min(icolor, uint3(255u, 255u, 255u));
#endif
#endif #endif
#endif #endif

View File

@ -53,16 +53,8 @@ enum class GPUTextureMode : u8
Palette4Bit = 0, Palette4Bit = 0,
Palette8Bit = 1, Palette8Bit = 1,
Direct16Bit = 2, Direct16Bit = 2,
Reserved_Direct16Bit = 3, Reserved_Direct16Bit2 = 3, // Not used.
Disabled = 3 // Not a register value
// Not register values.
RawTextureBit = 4,
RawPalette4Bit = RawTextureBit | Palette4Bit,
RawPalette8Bit = RawTextureBit | Palette8Bit,
RawDirect16Bit = RawTextureBit | Direct16Bit,
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
Disabled = 8 // Not a register value
}; };
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode); IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode);
@ -110,7 +102,7 @@ union GPURenderCommand
BitField<u32, GPUDrawRectangleSize, 27, 2> rectangle_size; // only for rectangles BitField<u32, GPUDrawRectangleSize, 27, 2> rectangle_size; // only for rectangles
BitField<u32, bool, 27, 1> quad_polygon; // only for polygons BitField<u32, bool, 27, 1> quad_polygon; // only for polygons
BitField<u32, bool, 27, 1> polyline; // only for lines BitField<u32, bool, 27, 1> polyline; // only for lines
BitField<u32, bool, 28, 1> shading_enable; // 0 - flat, 1 = gouroud BitField<u32, bool, 28, 1> shading_enable; // 0 - flat, 1 = gouraud
BitField<u32, GPUPrimitive, 29, 21> primitive; BitField<u32, GPUPrimitive, 29, 21> primitive;
/// Returns true if texturing should be enabled. Depends on the primitive type. /// Returns true if texturing should be enabled. Depends on the primitive type.

View File

@ -4,4 +4,4 @@
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
static constexpr u32 SHADER_CACHE_VERSION = 15; static constexpr u32 SHADER_CACHE_VERSION = 16;