GPU/ShaderGen: Move some calculations from fragment to vertex shader

This commit is contained in:
Connor McLaughlin 2021-06-13 20:37:13 +10:00
parent e1d9f93172
commit aafd20fb3c
6 changed files with 71 additions and 54 deletions

View File

@ -537,7 +537,7 @@ bool GPU_HW_D3D11::CompileShaders()
// we need a vertex shader...
ComPtr<ID3DBlob> vs_bytecode =
shader_cache.GetShaderBlob(D3D11::ShaderCompiler::Type::Vertex, shadergen.GenerateBatchVertexShader(true));
shader_cache.GetShaderBlob(D3D11::ShaderCompiler::Type::Vertex, shadergen.GenerateBatchVertexShader(true, true));
if (!vs_bytecode)
return false;
@ -562,9 +562,9 @@ bool GPU_HW_D3D11::CompileShaders()
UPDATE_PROGRESS();
for (u8 textured = 0; textured < 2; textured++)
for (u8 textured = 0; textured < 3; textured++)
{
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured));
const std::string vs = shadergen.GenerateBatchVertexShader(textured != 0, textured > 1);
m_batch_vertex_shaders[textured] = shader_cache.GetVertexShader(m_device.Get(), vs);
if (!m_batch_vertex_shaders[textured])
return false;
@ -807,9 +807,11 @@ bool GPU_HW_D3D11::BlitVRAMReplacementTexture(const TextureReplacementTexture* t
void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices)
{
const bool textured = (m_batch.texture_mode != GPUTextureMode::Disabled);
const u8 textured =
BoolToUInt8(m_batch.texture_mode != GPUTextureMode::Disabled) +
BoolToUInt8((m_batch.texture_mode & ~GPUTextureMode::RawTextureBit) <= GPUTextureMode::Palette8Bit);
m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0);
m_context->VSSetShader(m_batch_vertex_shaders[textured].Get(), nullptr, 0);
m_context->PSSetShader(m_batch_pixel_shaders[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]

View File

@ -116,7 +116,7 @@ private:
std::array<ComPtr<ID3D11BlendState>, 5> m_batch_blend_states; // [transparency_mode]
ComPtr<ID3D11InputLayout> m_batch_input_layout;
std::array<ComPtr<ID3D11VertexShader>, 2> m_batch_vertex_shaders; // [textured]
std::array<ComPtr<ID3D11VertexShader>, 3> m_batch_vertex_shaders; // [textured/palette]
std::array<std::array<std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2>, 9>, 4>
m_batch_pixel_shaders; // [render_mode][texture_mode][dithering][interlacing]

View File

@ -544,7 +544,8 @@ bool GPU_HW_OpenGL::CompilePrograms()
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured);
const bool paletted = textured && (static_cast<GPUTextureMode>(texture_mode & 3u) <= GPUTextureMode::Palette8Bit);
const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured, paletted);
const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));

View File

@ -23,6 +23,8 @@ void GPU_HW_ShaderGen::WriteCommonFunctions(std::stringstream& ss)
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0, 1.0) / float2(VRAM_SIZE);\n";
ss << "CONSTANT uint2 NATIVE_VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ");\n";
ss << "CONSTANT float2 RCP_NATIVE_VRAM_SIZE = float2(1.0, 1.0) / float2(NATIVE_VRAM_SIZE);\n";
ss << "CONSTANT uint MULTISAMPLES = " << m_multisamples << "u;\n";
ss << "CONSTANT bool PER_SAMPLE_SHADING = " << (m_per_sample_shading ? "true" : "false") << ";\n";
ss << R"(
@ -45,6 +47,15 @@ uint fixYCoord(uint y)
#endif
}
uint fixNativeYCoord(uint y)
{
#if API_OPENGL || API_OPENGL_ES
return NATIVE_VRAM_SIZE.y - y - 1u;
#else
return y;
#endif
}
uint RGBA8ToRGBA5551(float4 v)
{
uint r = uint(roundEven(v.r * 31.0));
@ -75,11 +86,12 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss)
false);
}
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool palette)
{
std::stringstream ss;
WriteHeader(ss);
DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "PALETTE", palette);
DefineMacro(ss, "UV_LIMITS", m_uv_limits);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth);
@ -159,21 +171,41 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
v_pos = float4(pos_x * pos_w, pos_y * pos_w, pos_z * pos_w, pos_w);
v_col0 = a_col0;
v_col0 = a_col0 * float4(255.0, 255.0, 255.0, 255.0);
#if TEXTURED
v_tex0 = float2(float((a_texcoord & 0xFFFFu) * RESOLUTION_SCALE),
float((a_texcoord >> 16) * RESOLUTION_SCALE));
#if PALETTE
// We can't currently use upscaled coordinate for palettes because of how they're packed.
// Not that it would be any benefit anyway, render-to-texture effects don't use palettes.
v_tex0 = float2(float(a_texcoord & 0xFFFFu), float((a_texcoord >> 16)));
// base_x,base_y,palette_x,palette_y
v_texpage.x = (a_texpage & 15u) * 64u * RESOLUTION_SCALE;
v_texpage.y = ((a_texpage >> 4) & 1u) * 256u * RESOLUTION_SCALE;
v_texpage.z = ((a_texpage >> 16) & 63u) * 16u * RESOLUTION_SCALE;
v_texpage.w = ((a_texpage >> 22) & 511u) * RESOLUTION_SCALE;
// base_x,base_y,palette_x,palette_y
v_texpage.x = (a_texpage & 15u) * 64u;
v_texpage.y = ((a_texpage >> 4) & 1u) * 256u;
v_texpage.z = ((a_texpage >> 16) & 63u) * 16u;
v_texpage.w = ((a_texpage >> 22) & 511u);
#if UV_LIMITS
v_uv_limits = a_uv_limits * float4(255.0, 255.0, 255.0, 255.0);
#endif
#endif
#if UV_LIMITS
v_uv_limits = a_uv_limits * float4(255.0, 255.0, 255.0, 255.0);
#endif
#else
v_tex0 = float2(float((a_texcoord & 0xFFFFu) * RESOLUTION_SCALE),
float((a_texcoord >> 16) * RESOLUTION_SCALE));
// base_x,base_y,palette_x,palette_y
v_texpage.x = (a_texpage & 15u) * 64u * RESOLUTION_SCALE;
v_texpage.y = ((a_texpage >> 4) & 1u) * 256u * RESOLUTION_SCALE;
v_texpage.z = ((a_texpage >> 16) & 63u) * 16u * RESOLUTION_SCALE;
v_texpage.w = ((a_texpage >> 22) & 511u) * RESOLUTION_SCALE;
#if UV_LIMITS
// Extend the UV range to all "upscaled" pixels. This means 1-pixel-high polygon-based
// framebuffer effects won't be downsampled. (e.g. Mega Man Legends 2 haze effect)
v_uv_limits = a_uv_limits * float4(255.0, 255.0, 255.0, 255.0);
v_uv_limits.xy *= float(RESOLUTION_SCALE);
v_uv_limits.zw = (v_uv_limits.zw * float(RESOLUTION_SCALE + 1u)) - float(RESOLUTION_SCALE - 1u);
#endif
#endif // PALETTE
#endif // TEXTURED
}
)";
@ -767,11 +799,9 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
index_coord.x /= 2u;
#endif
// fixup coords
uint2 vicoord = uint2(texpage.x + index_coord.x * RESOLUTION_SCALE, fixYCoord(texpage.y + index_coord.y * RESOLUTION_SCALE));
// load colour/palette
float4 texel = SAMPLE_TEXTURE(samp0, float2(vicoord) * RCP_VRAM_SIZE);
// load palette index
uint2 vicoord = uint2(texpage.x + index_coord.x, fixNativeYCoord(texpage.y + index_coord.y));
float4 texel = SAMPLE_TEXTURE(samp0, float2(vicoord) * RCP_NATIVE_VRAM_SIZE);
uint vram_value = RGBA8ToRGBA5551(texel);
// apply palette
@ -784,8 +814,8 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
#endif
// sample palette
uint2 palette_icoord = uint2(texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(texpage.w));
return SAMPLE_TEXTURE(samp0, float2(palette_icoord) * RCP_VRAM_SIZE);
uint2 palette_icoord = uint2(texpage.z + palette_index, fixNativeYCoord(texpage.w));
return SAMPLE_TEXTURE(samp0, float2(palette_icoord) * RCP_NATIVE_VRAM_SIZE);
#else
// Direct texturing. Render-to-texture effects. Use upscaled coordinates.
uint2 icoord = ApplyUpscaledTextureWindow(FloatToIntegerCoords(coords));
@ -822,7 +852,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
ss << R"(
{
uint3 vertcol = uint3(v_col0.rgb * float3(255.0, 255.0, 255.0));
uint3 vertcol = uint3(v_col0.rgb);
bool semitransparent;
uint3 icolor;
@ -835,34 +865,16 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
#endif
#if TEXTURED
// We can't currently use upscaled coordinate for palettes because of how they're packed.
// Not that it would be any benefit anyway, render-to-texture effects don't use palettes.
float2 coords = v_tex0;
#if PALETTE
coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
#endif
#if UV_LIMITS
float4 uv_limits = v_uv_limits;
#if !PALETTE
// Extend the UV range to all "upscaled" pixels. This means 1-pixel-high polygon-based
// framebuffer effects won't be downsampled. (e.g. Mega Man Legends 2 haze effect)
uv_limits.xy *= float(RESOLUTION_SCALE);
uv_limits.zw = (uv_limits.zw * float(RESOLUTION_SCALE + 1u)) - float(RESOLUTION_SCALE - 1u);
#endif
#endif
float4 texcol;
#if TEXTURE_FILTERING
FilteredSampleFromVRAM(v_texpage, coords, uv_limits, texcol, ialpha);
FilteredSampleFromVRAM(v_texpage, v_tex0, v_uv_limits, texcol, ialpha);
if (ialpha < 0.5)
discard;
#else
#if UV_LIMITS
texcol = SampleFromVRAM(v_texpage, clamp(coords, uv_limits.xy, uv_limits.zw));
texcol = SampleFromVRAM(v_texpage, clamp(v_tex0, v_uv_limits.xy, v_uv_limits.zw));
#else
texcol = SampleFromVRAM(v_texpage, coords);
texcol = SampleFromVRAM(v_texpage, v_tex0);
#endif
if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR))
discard;

View File

@ -10,7 +10,7 @@ public:
bool pgxp_depth, bool supports_dual_source_blend);
~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured);
std::string GenerateBatchVertexShader(bool textured, bool palette);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode,
bool dithering, bool interlacing);
std::string GenerateInterlacedFillFragmentShader();

View File

@ -859,16 +859,16 @@ bool GPU_HW_Vulkan::CompilePipelines()
// vertex shaders - [textured]
// fragment shaders - [render_mode][texture_mode][dithering][interlacing]
DimensionalArray<VkShaderModule, 2> batch_vertex_shaders{};
DimensionalArray<VkShaderModule, 3> batch_vertex_shaders{};
DimensionalArray<VkShaderModule, 2, 2, 9, 4> batch_fragment_shaders{};
Common::ScopeGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
batch_vertex_shaders.enumerate(Vulkan::Util::SafeDestroyShaderModule);
batch_fragment_shaders.enumerate(Vulkan::Util::SafeDestroyShaderModule);
});
for (u8 textured = 0; textured < 2; textured++)
for (u8 textured = 0; textured < 3; textured++)
{
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured));
const std::string vs = shadergen.GenerateBatchVertexShader(textured != 0, textured > 1);
VkShaderModule shader = g_vulkan_shader_cache->GetVertexShader(vs);
if (shader == VK_NULL_HANDLE)
return false;
@ -918,6 +918,8 @@ bool GPU_HW_Vulkan::CompilePipelines()
static constexpr std::array<VkCompareOp, 3> depth_test_values = {
VK_COMPARE_OP_ALWAYS, VK_COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_LESS_OR_EQUAL};
const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
const bool paletted =
textured && (static_cast<GPUTextureMode>(texture_mode & 3u) <= GPUTextureMode::Palette8Bit);
gpbuilder.SetPipelineLayout(m_batch_pipeline_layout);
gpbuilder.SetRenderPass(m_vram_render_pass, 0);
@ -934,7 +936,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
}
gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured)]);
gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured) + BoolToUInt8(paletted)]);
gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]);
gpbuilder.SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE);