GPU/HW: Expand lines into triangles before rendering

Fewer batches, consistent upscaling behavior.
This commit is contained in:
Connor McLaughlin 2020-08-02 17:17:05 +10:00
parent 568cfa1865
commit 96ba9198ef
11 changed files with 210 additions and 368 deletions

View File

@ -462,19 +462,11 @@ bool Context::SelectDeviceFeatures(const VkPhysicalDeviceFeatures* required_feat
VkPhysicalDeviceFeatures available_features;
vkGetPhysicalDeviceFeatures(m_physical_device, &available_features);
if (!available_features.fillModeNonSolid && !available_features.geometryShader)
{
Log_ErrorPrintf("fillModeNonSolid or geometryShader feature is required for line drawing.");
return false;
}
if (required_features)
std::memcpy(&m_device_features, required_features, sizeof(m_device_features));
// Enable the features we use.
m_device_features.dualSrcBlend = available_features.dualSrcBlend;
m_device_features.geometryShader = available_features.geometryShader;
m_device_features.fillModeNonSolid = available_features.fillModeNonSolid;
return true;
}

View File

@ -10,6 +10,15 @@
#include <sstream>
Log_SetChannel(GPU_HW);
template<typename T>
ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
{
if (v1 > v2)
return std::tie(v2, v1);
else
return std::tie(v1, v2);
}
GPU_HW::GPU_HW() : GPU()
{
m_vram_ptr = m_vram_shadow.data();
@ -189,25 +198,93 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
}
}
// The PlayStation GPU draws lines from start to end, inclusive. Or, more specifically, inclusive of the greatest delta
// in the x or y direction.
void GPU_HW::FixLineVertexCoordinates(s32& start_x, s32& start_y, s32& end_x, s32& end_y, s32 dx, s32 dy)
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
{
// deliberately not else if to catch the equal case
if (dx >= dy)
const float dx = x1 - x0;
const float dy = y1 - y0;
std::array<BatchVertex, 4> output;
if (dx == 0.0f && dy == 0.0f)
{
if (start_x > end_x)
start_x++;
else
end_x++;
// Degenerate, render a point.
output[0].Set(x0, y0, depth, 1.0f, col0, 0, 0);
output[1].Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0);
output[2].Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0);
output[3].Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0);
}
if (dx <= dy)
else
{
if (start_y > end_y)
start_y++;
const float abs_dx = std::abs(dx);
const float abs_dy = std::abs(dy);
float fill_dx, fill_dy;
float dxdk, dydk;
float pad_x0 = 0.0f;
float pad_x1 = 0.0f;
float pad_y0 = 0.0f;
float pad_y1 = 0.0f;
// Check for vertical or horizontal major lines.
// When expanding to a rect, do so in the appropriate direction.
// FIXME: This scheme seems to kinda work, but it seems very hard to find a method
// that looks perfect on every game.
// Vagrant Story speech bubbles are a very good test case here!
if (abs_dx > abs_dy)
{
fill_dx = 0.0f;
fill_dy = 1.0f;
dxdk = 1.0f;
dydk = dy / abs_dx;
if (dx > 0.0f)
{
// Right
pad_x1 = 1.0f;
pad_y1 = dydk;
}
else
{
// Left
pad_x0 = 1.0f;
pad_y0 = -dydk;
}
}
else
end_y++;
{
fill_dx = 1.0f;
fill_dy = 0.0f;
dydk = 1.0f;
dxdk = dx / abs_dy;
if (dy > 0.0f)
{
// Down
pad_y1 = 1.0f;
pad_x1 = dxdk;
}
else
{
// Up
pad_y0 = 1.0f;
pad_x0 = -dxdk;
}
}
const float ox0 = x0 + pad_x0;
const float oy0 = y0 + pad_y0;
const float ox1 = x1 + pad_x1;
const float oy1 = y1 + pad_y1;
output[0].Set(ox0, oy0, depth, 1.0f, col0, 0, 0);
output[1].Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0);
output[2].Set(ox1, oy1, depth, 1.0f, col1, 0, 0);
output[3].Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0);
}
AddVertex(output[0]);
AddVertex(output[1]);
AddVertex(output[2]);
AddVertex(output[3]);
AddVertex(output[2]);
AddVertex(output[1]);
}
void GPU_HW::LoadVertices()
@ -251,7 +328,7 @@ void GPU_HW::LoadVertices()
{
valid_w &=
PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, m_drawing_offset.x,
m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w);
m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w);
}
}
if (!valid_w)
@ -433,47 +510,37 @@ void GPU_HW::LoadVertices()
{
DebugAssert(GetBatchVertexSpace() >= 2);
u32 color0, color1;
VertexPosition pos0, pos1;
u32 start_color, end_color;
VertexPosition start_pos, end_pos;
if (rc.shading_enable)
{
color0 = rc.color_for_first_vertex;
pos0.bits = FifoPop();
color1 = FifoPop() & UINT32_C(0x00FFFFFF);
pos1.bits = FifoPop();
start_color = rc.color_for_first_vertex;
start_pos.bits = FifoPop();
end_color = FifoPop() & UINT32_C(0x00FFFFFF);
end_pos.bits = FifoPop();
}
else
{
color0 = color1 = rc.color_for_first_vertex;
pos0.bits = FifoPop();
pos1.bits = FifoPop();
start_color = end_color = rc.color_for_first_vertex;
start_pos.bits = FifoPop();
end_pos.bits = FifoPop();
}
if (!IsDrawingAreaIsValid())
return;
s32 start_x = pos0.x + m_drawing_offset.x;
s32 start_y = pos0.y + m_drawing_offset.y;
s32 end_x = pos1.x + m_drawing_offset.x;
s32 end_y = pos1.y + m_drawing_offset.y;
const s32 min_x = std::min(start_x, end_x);
const s32 max_x = std::max(start_x, end_x);
const s32 min_y = std::min(start_y, end_y);
const s32 max_y = std::max(start_y, end_y);
const s32 dx = max_x - min_x;
const s32 dy = max_y - min_y;
if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT)
s32 start_x = start_pos.x + m_drawing_offset.x;
s32 start_y = start_pos.y + m_drawing_offset.y;
s32 end_x = end_pos.x + m_drawing_offset.x;
s32 end_y = end_pos.y + m_drawing_offset.y;
const auto [min_x, max_x] = MinMax(start_x, end_x);
const auto [min_y, max_y] = MinMax(start_y, end_y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y);
return;
}
FixLineVertexCoordinates(start_x, start_y, end_x, end_y, dx, dy);
AddNewVertex(static_cast<float>(start_x), static_cast<float>(start_y), depth, 1.0f, color0, 0,
static_cast<u16>(0));
AddNewVertex(static_cast<float>(end_x), static_cast<float>(end_y), depth, 1.0f, color1, 0, static_cast<u16>(0));
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
@ -482,6 +549,10 @@ void GPU_HW::LoadVertices()
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
// TODO: Should we do a PGXP lookup here? Most lines are 2D.
DrawLine(static_cast<float>(start_x), static_cast<float>(start_y), start_color, static_cast<float>(end_x),
static_cast<float>(end_y), end_color, depth);
}
else
{
@ -492,57 +563,47 @@ void GPU_HW::LoadVertices()
if (!IsDrawingAreaIsValid())
return;
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
s32 last_x, last_y;
u32 last_color;
u32 buffer_pos = 0;
for (u32 i = 0; i < num_vertices; i++)
const VertexPosition start_vp{m_blit_buffer[buffer_pos++]};
s32 start_x = start_vp.x + m_drawing_offset.x;
s32 start_y = start_vp.y + m_drawing_offset.y;
u32 start_color = rc.color_for_first_vertex;
for (u32 i = 1; i < num_vertices; i++)
{
const u32 color = (shaded && i > 0) ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color;
const VertexPosition vp{m_blit_buffer[buffer_pos++]};
const s32 x = m_drawing_offset.x + vp.x;
const s32 y = m_drawing_offset.y + vp.y;
const s32 end_x = m_drawing_offset.x + vp.x;
const s32 end_y = m_drawing_offset.y + vp.y;
if (i > 0)
const auto [min_x, max_x] = MinMax(start_x, end_x);
const auto [min_y, max_y] = MinMax(start_y, end_y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
const s32 min_x = std::min(last_x, x);
const s32 max_x = std::max(last_x, x);
const s32 min_y = std::min(last_y, y);
const s32 max_y = std::max(last_y, y);
const s32 dx = max_x - min_x;
const s32 dy = max_y - min_y;
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, x, y);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", last_x, last_y, x, y);
}
else
{
s32 start_x = last_x, start_y = last_y;
s32 end_x = x, end_y = y;
FixLineVertexCoordinates(start_x, start_y, end_x, end_y, dx, dy);
AddNewVertex(static_cast<float>(start_x), static_cast<float>(start_y), depth, 1.0f, last_color, 0,
static_cast<u16>(0));
AddNewVertex(static_cast<float>(end_x), static_cast<float>(end_y), depth, 1.0f, color, 0,
static_cast<u16>(0));
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
}
// TODO: Should we do a PGXP lookup here? Most lines are 2D.
DrawLine(static_cast<float>(start_x), static_cast<float>(start_y), start_color, static_cast<float>(end_x),
static_cast<float>(end_y), end_color, depth);
}
last_x = x;
last_y = y;
last_color = color;
start_x = end_x;
start_y = end_y;
start_color = end_color;
}
}
}
@ -632,14 +693,6 @@ GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst
return uniforms;
}
GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
{
if (rc.primitive == Primitive::Line)
return BatchPrimitive::Lines;
else
return BatchPrimitive::Triangles;
}
void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect)
{
m_vram_dirty_rect.Include(rect);
@ -680,7 +733,7 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
break;
case Primitive::Line:
default:
required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 2u) : 2u;
required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 6u) : 6u;
break;
}
@ -778,10 +831,9 @@ void GPU_HW::DispatchRenderCommand()
// has any state changed which requires a new batch?
const TransparencyMode transparency_mode =
rc.transparency_enable ? m_draw_mode.GetTransparencyMode() : TransparencyMode::Disabled;
const BatchPrimitive rc_primitive = GetPrimitiveForCommand(rc);
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
if (m_batch.texture_mode != texture_mode || m_batch.transparency_mode != transparency_mode ||
m_batch.primitive != rc_primitive || dithering_enable != m_batch.dithering)
dithering_enable != m_batch.dithering)
{
FlushRender();
}
@ -815,7 +867,6 @@ void GPU_HW::DispatchRenderCommand()
}
// update state
m_batch.primitive = rc_primitive;
m_batch.texture_mode = texture_mode;
m_batch.transparency_mode = transparency_mode;
m_batch.dithering = dithering_enable;

View File

@ -11,12 +11,6 @@
class GPU_HW : public GPU
{
public:
enum class BatchPrimitive : u8
{
Lines = 0,
Triangles = 1
};
enum class BatchRenderMode : u8
{
TransparencyDisabled,
@ -84,7 +78,6 @@ protected:
struct BatchConfig
{
BatchPrimitive primitive;
TextureMode texture_mode;
TransparencyMode transparency_mode;
bool dithering;
@ -234,9 +227,11 @@ protected:
VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const;
VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const;
/// Expands a line into two triangles.
void DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth);
/// Handles quads with flipped texture coordinate directions.
static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices);
static void FixLineVertexCoordinates(s32& start_x, s32& start_y, s32& end_x, s32& end_y, s32 dx, s32 dy);
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
@ -274,8 +269,6 @@ private:
MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex)
};
static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc);
void LoadVertices();
ALWAYS_INLINE void AddVertex(const BatchVertex& v)

View File

@ -109,6 +109,7 @@ void GPU_HW_D3D11::RestoreGraphicsAPIState()
const UINT offset = 0;
m_context->IASetVertexBuffers(0, 1, m_vertex_stream_buffer.GetD3DBufferArray(), &stride, &offset);
m_context->IASetInputLayout(m_batch_input_layout.Get());
m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), m_vram_depth_view.Get());
m_context->RSSetState(m_cull_none_rasterizer_state.Get());
@ -405,15 +406,6 @@ bool GPU_HW_D3D11::CompileShaders()
}
}
m_batch_line_expand_geometry_shader.Reset();
if (m_resolution_scale > 1)
{
m_batch_line_expand_geometry_shader =
m_shader_cache.GetGeometryShader(m_device.Get(), shadergen.GenerateBatchLineExpandGeometryShader());
if (!m_batch_line_expand_geometry_shader)
return false;
}
m_copy_pixel_shader = m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateCopyFragmentShader());
if (!m_copy_pixel_shader)
return false;
@ -520,7 +512,6 @@ void GPU_HW_D3D11::DrawUtilityShader(ID3D11PixelShader* shader, const void* unif
m_batch_ubo_dirty = true;
}
m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
m_context->VSSetShader(m_screen_quad_vertex_shader.Get(), nullptr, 0);
m_context->GSSetShader(nullptr, nullptr, 0);
m_context->PSSetShader(shader, nullptr, 0);
@ -533,17 +524,8 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte
{
const bool textured = (m_batch.texture_mode != TextureMode::Disabled);
static constexpr std::array<D3D11_PRIMITIVE_TOPOLOGY, 2> d3d_primitives = {
{D3D11_PRIMITIVE_TOPOLOGY_LINELIST, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST}};
m_context->IASetPrimitiveTopology(d3d_primitives[static_cast<u8>(m_batch.primitive)]);
m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0);
m_context->GSSetShader((m_batch.primitive < GPU_HW::BatchPrimitive::Triangles && m_resolution_scale > 1) ?
m_batch_line_expand_geometry_shader.Get() :
nullptr,
nullptr, 0);
m_context->PSSetShader(m_batch_pixel_shaders[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]
.Get(),

View File

@ -110,7 +110,6 @@ private:
std::array<ComPtr<ID3D11VertexShader>, 2> m_batch_vertex_shaders; // [textured]
std::array<std::array<std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2>, 9>, 4>
m_batch_pixel_shaders; // [render_mode][texture_mode][dithering][interlacing]
ComPtr<ID3D11GeometryShader> m_batch_line_expand_geometry_shader;
ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader;
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;

View File

@ -397,20 +397,6 @@ bool GPU_HW_OpenGL::CompilePrograms()
}
m_render_programs[render_mode][texture_mode][dithering][interlacing] = std::move(*prog);
if (!textured && m_supports_geometry_shaders)
{
const std::string line_expand_vs = shadergen.GenerateBatchVertexShader(textured, true);
const std::string line_expand_gs = shadergen.GenerateBatchLineExpandGeometryShader();
prog = m_shader_cache.GetProgram(line_expand_vs, line_expand_gs, fs, link_callback);
if (!prog)
return false;
if (!use_binding_layout)
prog->BindUniformBlock("UBOBlock", 1);
m_line_render_programs[render_mode][dithering][interlacing] = std::move(*prog);
}
}
}
}
@ -524,12 +510,8 @@ bool GPU_HW_OpenGL::CompilePrograms()
void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices)
{
const GL::Program& prog =
((m_batch.primitive < BatchPrimitive::Triangles && m_supports_geometry_shaders && m_resolution_scale > 1) ?
m_line_render_programs[static_cast<u8>(render_mode)][BoolToUInt8(m_batch.dithering)]
[BoolToUInt8(m_batch.interlacing)] :
m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]);
const GL::Program& prog = m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)];
prog.Bind();
if (m_batch.texture_mode != TextureMode::Disabled)
@ -550,8 +532,7 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
static constexpr std::array<GLenum, 2> gl_primitives = {{GL_LINES, GL_TRIANGLES}};
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], m_batch_base_vertex, num_vertices);
glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices);
}
void GPU_HW_OpenGL::SetScissorFromDrawingArea()

View File

@ -81,9 +81,7 @@ private:
GLuint m_texture_buffer_r16ui_texture = 0;
std::array<std::array<std::array<std::array<GL::Program, 2>, 2>, 9>, 4>
m_render_programs; // [render_mode][texture_mode][dithering][interlacing]
std::array<std::array<std::array<GL::Program, 2>, 2>, 4>
m_line_render_programs; // [render_mode][dithering][interlacing]
m_render_programs; // [render_mode][texture_mode][dithering][interlacing]
std::array<std::array<GL::Program, 3>, 2> m_display_programs; // [depth_24][interlaced]
GL::Program m_vram_interlaced_fill_program;
GL::Program m_vram_read_program;

View File

@ -885,111 +885,6 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateBatchLineExpandGeometryShader()
{
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
ss << R"(
CONSTANT float2 WIDTH = (float(RESOLUTION_SCALE * 2u) / float2(VRAM_SIZE));
)";
// GS is a pain, too different between HLSL and GLSL...
if (m_glsl)
{
if (IsVulkan())
ss << "layout(location = 0) ";
ss << R"(in VertexDataVS {
float4 v_col0;
} in_data[];)";
if (IsVulkan())
ss << "layout(location = 0) ";
ss << R"(out VertexData {
float4 v_col0;
} out_data;
layout(lines) in;
layout(triangle_strip, max_vertices = 4) out;
void main() {
float2 dir = normalize(gl_in[1].gl_Position.xy - gl_in[0].gl_Position.xy);
float2 normal = cross(float3(dir, 0.0), float3(0.0, 0.0, 1.0)).xy * WIDTH;
float4 offset = float4(normal, 0.0, 0.0);
// top-left
out_data.v_col0 = in_data[0].v_col0;
gl_Position = gl_in[0].gl_Position;
EmitVertex();
// top-right
out_data.v_col0 = in_data[0].v_col0;
gl_Position = gl_in[0].gl_Position + offset;
EmitVertex();
// bottom-left
out_data.v_col0 = in_data[1].v_col0;
gl_Position = gl_in[1].gl_Position;
EmitVertex();
// bottom-right
out_data.v_col0 = in_data[1].v_col0;
gl_Position = gl_in[1].gl_Position + offset;
EmitVertex();
EndPrimitive();
}
)";
}
else
{
ss << R"(
struct Vertex
{
float4 col0 : COLOR0;
float4 pos : SV_Position;
};
[maxvertexcount(4)]
void main(line Vertex input[2], inout TriangleStream<Vertex> output)
{
Vertex v;
float2 dir = normalize(input[1].pos.xy - input[0].pos.xy);
float2 normal = cross(float3(dir, 0.0), float3(0.0, 0.0, 1.0)).xy * WIDTH;
float4 offset = float4(normal, 0.0, 0.0);
// top-left
v.col0 = input[0].col0;
v.pos = input[0].pos;
output.Append(v);
// top-right
v.col0 = input[0].col0;
v.pos = input[0].pos + offset;
output.Append(v);
// bottom-left
v.col0 = input[1].col0;
v.pos = input[1].pos;
output.Append(v);
// bottom-right
v.col0 = input[1].col0;
v.pos = input[1].pos + offset;
output.Append(v);
output.RestartStrip();
}
)";
}
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateScreenQuadVertexShader()
{
std::stringstream ss;

View File

@ -16,7 +16,6 @@ public:
std::string GenerateBatchVertexShader(bool textured, bool upscaled_lines);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode,
bool dithering, bool interlacing);
std::string GenerateBatchLineExpandGeometryShader();
std::string GenerateScreenQuadVertexShader();
std::string GenerateFillFragmentShader();
std::string GenerateInterlacedFillFragmentShader();

View File

@ -565,10 +565,6 @@ bool GPU_HW_Vulkan::CreateTextureBuffer()
bool GPU_HW_Vulkan::CompilePipelines()
{
static constexpr std::array<VkPrimitiveTopology, 2> primitive_mapping = {
{VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST}};
static constexpr std::array<VkPolygonMode, 2> polygon_mode_mapping = {{VK_POLYGON_MODE_LINE, VK_POLYGON_MODE_FILL}};
g_host_interface->DisplayLoadingScreen("Compiling Shaders...");
VkDevice device = g_vulkan_context->GetDevice();
@ -579,15 +575,12 @@ bool GPU_HW_Vulkan::CompilePipelines()
// vertex shaders - [textured]
// fragment shaders - [render_mode][texture_mode][dithering][interlacing]
DimensionalArray<VkShaderModule, 2, 2> batch_vertex_shaders{};
DimensionalArray<VkShaderModule, 2> batch_vertex_shaders{};
DimensionalArray<VkShaderModule, 2, 2, 9, 4> batch_fragment_shaders{};
VkShaderModule batch_line_geometry_shader = VK_NULL_HANDLE;
Common::ScopeGuard batch_shader_guard(
[&batch_vertex_shaders, &batch_fragment_shaders, &batch_line_geometry_shader]() {
batch_vertex_shaders.enumerate(Vulkan::Util::SafeDestroyShaderModule);
batch_fragment_shaders.enumerate(Vulkan::Util::SafeDestroyShaderModule);
Vulkan::Util::SafeDestroyShaderModule(batch_line_geometry_shader);
});
Common::ScopeGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
batch_vertex_shaders.enumerate(Vulkan::Util::SafeDestroyShaderModule);
batch_fragment_shaders.enumerate(Vulkan::Util::SafeDestroyShaderModule);
});
for (u8 textured = 0; textured < 2; textured++)
{
@ -596,7 +589,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
if (shader == VK_NULL_HANDLE)
return false;
batch_vertex_shaders[textured][0] = shader;
batch_vertex_shaders[textured] = shader;
}
for (u8 render_mode = 0; render_mode < 4; render_mode++)
@ -621,107 +614,66 @@ bool GPU_HW_Vulkan::CompilePipelines()
}
}
if (m_resolution_scale > 1 || !g_vulkan_context->GetDeviceFeatures().fillModeNonSolid)
{
if (g_vulkan_context->GetDeviceFeatures().geometryShader)
{
const std::string gs = shadergen.GenerateBatchLineExpandGeometryShader();
batch_line_geometry_shader = g_vulkan_shader_cache->GetGeometryShader(gs);
if (batch_line_geometry_shader == VK_NULL_HANDLE)
return false;
for (u8 textured = 0; textured < 2; textured++)
{
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured), true);
VkShaderModule shader = g_vulkan_shader_cache->GetVertexShader(vs);
if (shader == VK_NULL_HANDLE)
return false;
batch_vertex_shaders[textured][1] = shader;
}
}
else
{
Log_WarningPrintf("Upscaling requested but geometry shaders are unsupported, line rendering will be incorrect.");
}
}
Vulkan::GraphicsPipelineBuilder gpbuilder;
// [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
for (u8 primitive = 0; primitive < 2; primitive++)
for (u8 depth_test = 0; depth_test < 2; depth_test++)
{
for (u8 depth_test = 0; depth_test < 2; depth_test++)
for (u8 render_mode = 0; render_mode < 4; render_mode++)
{
for (u8 render_mode = 0; render_mode < 4; render_mode++)
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
{
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
for (u8 dithering = 0; dithering < 2; dithering++)
{
for (u8 dithering = 0; dithering < 2; dithering++)
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
gpbuilder.SetPipelineLayout(m_batch_pipeline_layout);
gpbuilder.SetRenderPass(m_vram_render_pass, 0);
gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX);
gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32A32_SFLOAT, offsetof(BatchVertex, x));
gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color));
if (textured)
{
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
gpbuilder.SetPipelineLayout(m_batch_pipeline_layout);
gpbuilder.SetRenderPass(m_vram_render_pass, 0);
gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX);
gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32A32_SFLOAT, offsetof(BatchVertex, x));
gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color));
if (textured)
{
gpbuilder.AddVertexAttribute(2, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, u));
gpbuilder.AddVertexAttribute(3, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, texpage));
}
gpbuilder.SetPrimitiveTopology(primitive_mapping[primitive]);
gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]);
if (static_cast<BatchPrimitive>(primitive) == BatchPrimitive::Lines &&
batch_line_geometry_shader != VK_NULL_HANDLE)
{
gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured)][1]);
gpbuilder.SetGeometryShader(batch_line_geometry_shader);
gpbuilder.SetRasterizationState(polygon_mode_mapping[static_cast<u8>(BatchPrimitive::Triangles)],
VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE);
}
else
{
gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured)][0]);
gpbuilder.SetRasterizationState(polygon_mode_mapping[primitive], VK_CULL_MODE_NONE,
VK_FRONT_FACE_CLOCKWISE);
}
gpbuilder.SetDepthState(true, true,
(depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS);
gpbuilder.SetNoBlendingState();
if ((static_cast<TransparencyMode>(transparency_mode) != TransparencyMode::Disabled &&
(static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) ||
m_texture_filtering)
{
gpbuilder.SetBlendAttachment(
0, true, VK_BLEND_FACTOR_ONE,
m_supports_dual_source_blend ? VK_BLEND_FACTOR_SRC1_ALPHA : VK_BLEND_FACTOR_SRC_ALPHA,
(static_cast<TransparencyMode>(transparency_mode) == TransparencyMode::BackgroundMinusForeground) ?
VK_BLEND_OP_REVERSE_SUBTRACT :
VK_BLEND_OP_ADD,
VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD);
}
gpbuilder.SetDynamicViewportAndScissorState();
VkPipeline pipeline = gpbuilder.Create(device, pipeline_cache);
if (pipeline == VK_NULL_HANDLE)
return false;
m_batch_pipelines[primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering]
[interlacing] = pipeline;
gpbuilder.AddVertexAttribute(2, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, u));
gpbuilder.AddVertexAttribute(3, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, texpage));
}
gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured)]);
gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]);
gpbuilder.SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE);
gpbuilder.SetDepthState(true, true,
(depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS);
gpbuilder.SetNoBlendingState();
if ((static_cast<TransparencyMode>(transparency_mode) != TransparencyMode::Disabled &&
(static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) ||
m_texture_filtering)
{
gpbuilder.SetBlendAttachment(
0, true, VK_BLEND_FACTOR_ONE,
m_supports_dual_source_blend ? VK_BLEND_FACTOR_SRC1_ALPHA : VK_BLEND_FACTOR_SRC_ALPHA,
(static_cast<TransparencyMode>(transparency_mode) == TransparencyMode::BackgroundMinusForeground) ?
VK_BLEND_OP_REVERSE_SUBTRACT :
VK_BLEND_OP_ADD,
VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD);
}
gpbuilder.SetDynamicViewportAndScissorState();
VkPipeline pipeline = gpbuilder.Create(device, pipeline_cache);
if (pipeline == VK_NULL_HANDLE)
return false;
m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] =
pipeline;
}
}
}
@ -920,8 +872,8 @@ void GPU_HW_Vulkan::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert
// [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
VkPipeline pipeline =
m_batch_pipelines[static_cast<u8>(m_batch.primitive)][BoolToUInt8(m_batch.check_mask_before_draw)][static_cast<u8>(
render_mode)][static_cast<u8>(m_batch.texture_mode)][static_cast<u8>(m_batch.transparency_mode)]
m_batch_pipelines[BoolToUInt8(m_batch.check_mask_before_draw)][static_cast<u8>(render_mode)]
[static_cast<u8>(m_batch.texture_mode)][static_cast<u8>(m_batch.transparency_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)];
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);

View File

@ -105,8 +105,8 @@ private:
u32 m_current_uniform_buffer_offset = 0;
VkBufferView m_texture_stream_buffer_view = VK_NULL_HANDLE;
// [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
DimensionalArray<VkPipeline, 2, 2, 5, 9, 4, 2, 2> m_batch_pipelines{};
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
DimensionalArray<VkPipeline, 2, 2, 5, 9, 4, 2> m_batch_pipelines{};
// [interlaced]
std::array<VkPipeline, 2> m_vram_fill_pipelines{};