GPU/HW: Eliminate raw texture pipeline permutations
This commit is contained in:
parent
51648b0714
commit
c1792108f4
|
@ -784,19 +784,21 @@ bool GPU_HW::CompilePipelines()
|
||||||
m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch,
|
m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch,
|
||||||
m_debanding);
|
m_debanding);
|
||||||
|
|
||||||
const u32 total_pipelines = 2 + // vertex shaders
|
constexpr u32 active_texture_modes = 4;
|
||||||
(5 * 5 * 9 * 2 * 2 * 2) + // fragment shaders
|
const u32 total_pipelines =
|
||||||
((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * 9 * 2 * 2 * 2) + // batch pipelines
|
2 + // vertex shaders
|
||||||
((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe
|
(active_texture_modes * 5 * 9 * 2 * 2 * 2) + // fragment shaders
|
||||||
1 + // fullscreen quad VS
|
((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines
|
||||||
(2 * 2) + // vram fill
|
((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe
|
||||||
(1 + BoolToUInt32(write_mask_as_depth)) + // vram copy
|
1 + // fullscreen quad VS
|
||||||
(1 + BoolToUInt32(write_mask_as_depth)) + // vram write
|
(2 * 2) + // vram fill
|
||||||
1 + // vram write replacement
|
(1 + BoolToUInt32(write_mask_as_depth)) + // vram copy
|
||||||
(needs_depth_buffer ? 1 : 0) + // mask -> depth
|
(1 + BoolToUInt32(write_mask_as_depth)) + // vram write
|
||||||
1 + // vram read
|
1 + // vram write replacement
|
||||||
2 + // extract/display
|
(needs_depth_buffer ? 1 : 0) + // mask -> depth
|
||||||
((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample
|
1 + // vram read
|
||||||
|
2 + // extract/display
|
||||||
|
((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample
|
||||||
|
|
||||||
ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines);
|
ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines);
|
||||||
|
|
||||||
|
@ -804,7 +806,7 @@ bool GPU_HW::CompilePipelines()
|
||||||
// fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing]
|
// fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing]
|
||||||
static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); };
|
static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); };
|
||||||
DimensionalArray<std::unique_ptr<GPUShader>, 2> batch_vertex_shaders{};
|
DimensionalArray<std::unique_ptr<GPUShader>, 2> batch_vertex_shaders{};
|
||||||
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, 9, 5, 5> batch_fragment_shaders{};
|
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5> batch_fragment_shaders{};
|
||||||
ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
|
ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
|
||||||
batch_vertex_shaders.enumerate(destroy_shader);
|
batch_vertex_shaders.enumerate(destroy_shader);
|
||||||
batch_fragment_shaders.enumerate(destroy_shader);
|
batch_fragment_shaders.enumerate(destroy_shader);
|
||||||
|
@ -835,11 +837,11 @@ bool GPU_HW::CompilePipelines()
|
||||||
(m_supports_framebuffer_fetch && (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
|
(m_supports_framebuffer_fetch && (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
|
||||||
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))))
|
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))))
|
||||||
{
|
{
|
||||||
progress.Increment(9 * 2 * 2 * 2);
|
progress.Increment(4 * 2 * 2 * 2);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
|
for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++)
|
||||||
{
|
{
|
||||||
for (u8 check_mask = 0; check_mask < 2; check_mask++)
|
for (u8 check_mask = 0; check_mask < 2; check_mask++)
|
||||||
{
|
{
|
||||||
|
@ -926,7 +928,7 @@ bool GPU_HW::CompilePipelines()
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
|
for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++)
|
||||||
{
|
{
|
||||||
for (u8 dithering = 0; dithering < 2; dithering++)
|
for (u8 dithering = 0; dithering < 2; dithering++)
|
||||||
{
|
{
|
||||||
|
@ -1954,11 +1956,11 @@ void GPU_HW::LoadVertices()
|
||||||
{
|
{
|
||||||
case GPUPrimitive::Polygon:
|
case GPUPrimitive::Polygon:
|
||||||
{
|
{
|
||||||
const u32 first_color = rc.color_for_first_vertex;
|
|
||||||
const bool shaded = rc.shading_enable;
|
|
||||||
const bool textured = rc.texture_enable;
|
const bool textured = rc.texture_enable;
|
||||||
|
const bool raw_texture = textured && rc.raw_texture_enable;
|
||||||
|
const bool shaded = rc.shading_enable;
|
||||||
const bool pgxp = g_settings.gpu_pgxp_enable;
|
const bool pgxp = g_settings.gpu_pgxp_enable;
|
||||||
|
const u32 first_color = rc.color_for_first_vertex;
|
||||||
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
|
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
|
||||||
std::array<BatchVertex, 4> vertices;
|
std::array<BatchVertex, 4> vertices;
|
||||||
std::array<std::array<s32, 2>, 4> native_vertex_positions;
|
std::array<std::array<s32, 2>, 4> native_vertex_positions;
|
||||||
|
@ -1966,7 +1968,8 @@ void GPU_HW::LoadVertices()
|
||||||
bool valid_w = g_settings.gpu_pgxp_texture_correction;
|
bool valid_w = g_settings.gpu_pgxp_texture_correction;
|
||||||
for (u32 i = 0; i < num_vertices; i++)
|
for (u32 i = 0; i < num_vertices; i++)
|
||||||
{
|
{
|
||||||
const u32 color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
|
const u32 vert_color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
|
||||||
|
const u32 color = raw_texture ? UINT32_C(0x00808080) : vert_color;
|
||||||
const u64 maddr_and_pos = m_fifo.Pop();
|
const u64 maddr_and_pos = m_fifo.Pop();
|
||||||
const GPUVertexPosition vp{Truncate32(maddr_and_pos)};
|
const GPUVertexPosition vp{Truncate32(maddr_and_pos)};
|
||||||
const u16 texcoord = textured ? Truncate16(FifoPop()) : 0;
|
const u16 texcoord = textured ? Truncate16(FifoPop()) : 0;
|
||||||
|
@ -2123,7 +2126,7 @@ void GPU_HW::LoadVertices()
|
||||||
|
|
||||||
case GPUPrimitive::Rectangle:
|
case GPUPrimitive::Rectangle:
|
||||||
{
|
{
|
||||||
const u32 color = rc.color_for_first_vertex;
|
const u32 color = (rc.texture_enable && rc.raw_texture_enable) ? UINT32_C(0x00808080) : rc.color_for_first_vertex;
|
||||||
const GPUVertexPosition vp{FifoPop()};
|
const GPUVertexPosition vp{FifoPop()};
|
||||||
const s32 pos_x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x);
|
const s32 pos_x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x);
|
||||||
const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y);
|
const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y);
|
||||||
|
@ -2967,7 +2970,7 @@ void GPU_HW::DispatchRenderCommand()
|
||||||
{
|
{
|
||||||
const GPURenderCommand rc{m_render_command.bits};
|
const GPURenderCommand rc{m_render_command.bits};
|
||||||
|
|
||||||
GPUTextureMode texture_mode;
|
GPUTextureMode texture_mode = GPUTextureMode::Disabled;
|
||||||
if (rc.IsTexturingEnabled())
|
if (rc.IsTexturingEnabled())
|
||||||
{
|
{
|
||||||
// texture page changed - check that the new page doesn't intersect the drawing area
|
// texture page changed - check that the new page doesn't intersect the drawing area
|
||||||
|
@ -3027,16 +3030,9 @@ void GPU_HW::DispatchRenderCommand()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
texture_mode = m_draw_mode.mode_reg.texture_mode;
|
texture_mode = (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit2) ?
|
||||||
if (rc.raw_texture_enable)
|
GPUTextureMode::Direct16Bit :
|
||||||
{
|
m_draw_mode.mode_reg.texture_mode;
|
||||||
texture_mode =
|
|
||||||
static_cast<GPUTextureMode>(static_cast<u8>(texture_mode) | static_cast<u8>(GPUTextureMode::RawTextureBit));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
texture_mode = GPUTextureMode::Disabled;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// has any state changed which requires a new batch?
|
// has any state changed which requires a new batch?
|
||||||
|
|
|
@ -57,7 +57,8 @@ private:
|
||||||
{
|
{
|
||||||
MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2,
|
MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2,
|
||||||
MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
|
MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
|
||||||
(((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u)
|
(((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u),
|
||||||
|
NUM_TEXTURE_MODES = 4,
|
||||||
};
|
};
|
||||||
enum : u8
|
enum : u8
|
||||||
{
|
{
|
||||||
|
|
|
@ -638,8 +638,6 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
|
||||||
// TODO: don't write depth for shader blend
|
// TODO: don't write depth for shader blend
|
||||||
DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend);
|
DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend);
|
||||||
|
|
||||||
const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit;
|
|
||||||
const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit;
|
|
||||||
const bool textured = (texture_mode != GPUTextureMode::Disabled);
|
const bool textured = (texture_mode != GPUTextureMode::Disabled);
|
||||||
const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend &&
|
const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend &&
|
||||||
(transparency != GPUTransparencyMode::Disabled || check_mask));
|
(transparency != GPUTransparencyMode::Disabled || check_mask));
|
||||||
|
@ -658,10 +656,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
|
||||||
DefineMacro(ss, "CHECK_MASK_BIT", check_mask);
|
DefineMacro(ss, "CHECK_MASK_BIT", check_mask);
|
||||||
DefineMacro(ss, "TEXTURED", textured);
|
DefineMacro(ss, "TEXTURED", textured);
|
||||||
DefineMacro(ss, "PALETTE",
|
DefineMacro(ss, "PALETTE",
|
||||||
actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit);
|
texture_mode == GPUTextureMode::Palette4Bit || texture_mode == GPUTextureMode::Palette8Bit);
|
||||||
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPUTextureMode::Palette4Bit);
|
DefineMacro(ss, "PALETTE_4_BIT", texture_mode == GPUTextureMode::Palette4Bit);
|
||||||
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPUTextureMode::Palette8Bit);
|
DefineMacro(ss, "PALETTE_8_BIT", texture_mode == GPUTextureMode::Palette8Bit);
|
||||||
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
|
|
||||||
DefineMacro(ss, "DITHERING", dithering);
|
DefineMacro(ss, "DITHERING", dithering);
|
||||||
DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering);
|
DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering);
|
||||||
// Debanding requires true color to work correctly.
|
// Debanding requires true color to work correctly.
|
||||||
|
@ -873,23 +870,19 @@ float3 ApplyDebanding(float2 frag_coord)
|
||||||
// If not using true color, truncate the framebuffer colors to 5-bit.
|
// If not using true color, truncate the framebuffer colors to 5-bit.
|
||||||
#if !TRUE_COLOR
|
#if !TRUE_COLOR
|
||||||
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3;
|
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3;
|
||||||
#if !RAW_TEXTURE
|
icolor = (icolor * vertcol) >> 4;
|
||||||
icolor = (icolor * vertcol) >> 4;
|
#if DITHERING
|
||||||
#if DITHERING
|
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
|
||||||
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
|
#else
|
||||||
#else
|
icolor = min(icolor >> 3, uint3(31u, 31u, 31u));
|
||||||
icolor = min(icolor >> 3, uint3(31u, 31u, 31u));
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0) + ApplyDebanding(v_pos.xy));
|
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0) + ApplyDebanding(v_pos.xy));
|
||||||
#if !RAW_TEXTURE
|
icolor = (icolor * vertcol) >> 7;
|
||||||
icolor = (icolor * vertcol) >> 7;
|
#if DITHERING
|
||||||
#if DITHERING
|
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
|
||||||
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
|
#else
|
||||||
#else
|
icolor = min(icolor, uint3(255u, 255u, 255u));
|
||||||
icolor = min(icolor, uint3(255u, 255u, 255u));
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -53,16 +53,8 @@ enum class GPUTextureMode : u8
|
||||||
Palette4Bit = 0,
|
Palette4Bit = 0,
|
||||||
Palette8Bit = 1,
|
Palette8Bit = 1,
|
||||||
Direct16Bit = 2,
|
Direct16Bit = 2,
|
||||||
Reserved_Direct16Bit = 3,
|
Reserved_Direct16Bit2 = 3, // Not used.
|
||||||
|
Disabled = 3 // Not a register value
|
||||||
// Not register values.
|
|
||||||
RawTextureBit = 4,
|
|
||||||
RawPalette4Bit = RawTextureBit | Palette4Bit,
|
|
||||||
RawPalette8Bit = RawTextureBit | Palette8Bit,
|
|
||||||
RawDirect16Bit = RawTextureBit | Direct16Bit,
|
|
||||||
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
|
|
||||||
|
|
||||||
Disabled = 8 // Not a register value
|
|
||||||
};
|
};
|
||||||
|
|
||||||
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode);
|
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode);
|
||||||
|
@ -110,7 +102,7 @@ union GPURenderCommand
|
||||||
BitField<u32, GPUDrawRectangleSize, 27, 2> rectangle_size; // only for rectangles
|
BitField<u32, GPUDrawRectangleSize, 27, 2> rectangle_size; // only for rectangles
|
||||||
BitField<u32, bool, 27, 1> quad_polygon; // only for polygons
|
BitField<u32, bool, 27, 1> quad_polygon; // only for polygons
|
||||||
BitField<u32, bool, 27, 1> polyline; // only for lines
|
BitField<u32, bool, 27, 1> polyline; // only for lines
|
||||||
BitField<u32, bool, 28, 1> shading_enable; // 0 - flat, 1 = gouroud
|
BitField<u32, bool, 28, 1> shading_enable; // 0 - flat, 1 = gouraud
|
||||||
BitField<u32, GPUPrimitive, 29, 21> primitive;
|
BitField<u32, GPUPrimitive, 29, 21> primitive;
|
||||||
|
|
||||||
/// Returns true if texturing should be enabled. Depends on the primitive type.
|
/// Returns true if texturing should be enabled. Depends on the primitive type.
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
static constexpr u32 SHADER_CACHE_VERSION = 15;
|
static constexpr u32 SHADER_CACHE_VERSION = 16;
|
||||||
|
|
Loading…
Reference in New Issue