GPU/HW: Clamp interpolated UVs to polygon limits

Fixes texture filtering and PGXP issues in some games.
This commit is contained in:
Connor McLaughlin 2020-08-10 22:37:07 +10:00
parent f14dc6de27
commit b95ce993e0
7 changed files with 130 additions and 58 deletions

View File

@ -215,6 +215,32 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
}
}
bool GPU_HW::AreUVLimitsNeeded()
{
// We only need UV limits if PGXP is enabled, or texture filtering is enabled.
return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filtering;
}
void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices)
{
u16 min_u = vertices[0].u, max_u = vertices[0].u, min_v = vertices[0].v, max_v = vertices[0].v;
for (u32 i = 1; i < num_vertices; i++)
{
min_u = std::min<u16>(min_u, vertices[i].u);
max_u = std::max<u16>(max_u, vertices[i].u);
min_v = std::min<u16>(min_v, vertices[i].v);
max_v = std::max<u16>(max_v, vertices[i].v);
}
if (min_u != max_u)
max_u--;
if (min_v != max_v)
max_v--;
for (u32 i = 0; i < num_vertices; i++)
vertices[i].SetUVLimits(min_u, max_u, min_v, max_v);
}
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
{
const float dx = x1 - x0;
@ -223,10 +249,10 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
if (dx == 0.0f && dy == 0.0f)
{
// Degenerate, render a point.
output[0].Set(x0, y0, depth, 1.0f, col0, 0, 0);
output[1].Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0);
output[2].Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0);
output[3].Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0);
output[0].Set(x0, y0, depth, 1.0f, col0, 0, 0, 0);
output[1].Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0);
output[2].Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
output[3].Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
}
else
{
@ -290,10 +316,10 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
const float ox1 = x1 + pad_x1;
const float oy1 = y1 + pad_y1;
output[0].Set(ox0, oy0, depth, 1.0f, col0, 0, 0);
output[1].Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0);
output[2].Set(ox1, oy1, depth, 1.0f, col1, 0, 0);
output[3].Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0);
output[0].Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0);
output[1].Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0);
output[2].Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0);
output[3].Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0);
}
AddVertex(output[0]);
@ -339,7 +365,7 @@ void GPU_HW::LoadVertices()
native_vertex_positions[i][0] = native_x;
native_vertex_positions[i][1] = native_y;
vertices[i].Set(static_cast<float>(native_x), static_cast<float>(native_y), depth, 1.0f, color, texpage,
texcoord);
texcoord, 0xFFFF0000u);
if (pgxp)
{
@ -357,6 +383,9 @@ void GPU_HW::LoadVertices()
if (rc.quad_polygon && m_resolution_scale > 1)
HandleFlippedQuadTextureCoordinates(vertices.data());
if (AreUVLimitsNeeded())
ComputePolygonUVLimits(vertices.data(), num_vertices);
if (!IsDrawingAreaIsValid())
return;
@ -490,14 +519,15 @@ void GPU_HW::LoadVertices()
const float quad_start_x = static_cast<float>(pos_x + x_offset);
const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
const u16 tex_right = tex_left + static_cast<u16>(quad_width);
const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top);
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
x_offset += quad_width;
tex_left = 0;
@ -628,6 +658,8 @@ void GPU_HW::LoadVertices()
UnreachableCode();
break;
}
FlushRender();
}
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)

View File

@ -58,13 +58,16 @@ protected:
u32 texpage;
u16 u; // 16-bit texcoords are needed for 256 extent rectangles
u16 v;
u32 uv_limits;
ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord)
ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord,
u32 uv_limits_)
{
Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8));
Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8), uv_limits_);
}
ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_)
ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_,
u32 uv_limits_)
{
x = x_;
y = y_;
@ -74,6 +77,17 @@ protected:
texpage = texpage_;
u = u_;
v = v_;
uv_limits = uv_limits_;
}
ALWAYS_INLINE static u32 PackUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v)
{
return min_u | (min_v << 8) | (max_u << 16) | (max_v << 24);
}
ALWAYS_INLINE void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v)
{
uv_limits = PackUVLimits(min_u, max_u, min_v, max_v);
}
};
@ -236,6 +250,10 @@ protected:
/// Handles quads with flipped texture coordinate directions.
static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices);
/// Computes polygon U/V boundaries.
static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices);
static bool AreUVLimitsNeeded();
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
BatchVertex* m_batch_start_vertex_ptr = nullptr;

View File

@ -265,11 +265,12 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
bool GPU_HW_D3D11::CreateBatchInputLayout()
{
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 4> attributes = {
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 5> attributes = {
{{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, uv_limits), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
// we need a vertex shader...
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,

View File

@ -95,6 +95,7 @@ private:
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
ComPtr<ID3D11RasterizerState> m_wireframe_rasterizer_state;
ComPtr<ID3D11DepthStencilState> m_depth_disabled_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_always_state;

View File

@ -297,12 +297,15 @@ bool GPU_HW_OpenGL::CreateVertexBuffer()
glEnableVertexAttribArray(1);
glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3);
glEnableVertexAttribArray(4);
glVertexAttribPointer(0, 4, GL_FLOAT, false, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x)));
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
reinterpret_cast<void*>(offsetof(BatchVertex, color)));
glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, u)));
glVertexAttribIPointer(3, 1, GL_UNSIGNED_INT, sizeof(BatchVertex),
reinterpret_cast<void*>(offsetof(BatchVertex, texpage)));
glVertexAttribPointer(4, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
reinterpret_cast<void*>(offsetof(BatchVertex, uv_limits)));
glBindVertexArray(0);
glGenVertexArrays(1, &m_attributeless_vao_id);
@ -367,6 +370,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
{
prog.BindAttribute(2, "a_texcoord");
prog.BindAttribute(3, "a_texpage");
prog.BindAttribute(4, "a_uv_limits");
}
if (!IsGLES() || m_supports_dual_source_blend)

View File

@ -508,8 +508,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc
const char* output_block_suffix = upscaled_lines ? "VS" : "";
if (textured)
{
DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}}, false, output_block_suffix);
DeclareVertexEntryPoint(
ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, false, output_block_suffix);
}
else
{
@ -557,6 +558,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc
v_texpage.y = ((a_texpage >> 4) & 1u) * 256u * RESOLUTION_SCALE;
v_texpage.z = ((a_texpage >> 16) & 63u) * 16u * RESOLUTION_SCALE;
v_texpage.w = ((a_texpage >> 22) & 511u) * RESOLUTION_SCALE;
v_uv_limits = a_uv_limits * float4(255.0, 255.0, 255.0, 255.0);
#endif
}
)";
@ -658,13 +661,7 @@ uint2 FloatToIntegerCoords(float2 coords)
float4 SampleFromVRAM(uint4 texpage, float2 coords)
{
#if PALETTE
// We can't currently use upscaled coordinate for palettes because of how they're packed.
// Not that it would be any benefit anyway, render-to-texture effects don't use palettes.
#if !TEXTURE_FILTERING
coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
#endif
uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords));
uint2 index_coord = icoord;
#if PALETTE_4_BIT
index_coord.x /= 4u;
@ -698,12 +695,43 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
return SAMPLE_TEXTURE(samp0, float2(direct_icoord) * RCP_VRAM_SIZE);
#endif
}
void BilinearSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
out float4 texcol, out float ialpha)
{
// Compute the coordinates of the four texels we will be interpolating between.
// Clamp this to the triangle texture coordinates.
float2 texel_top_left = frac(coords) - float2(0.5, 0.5);
float2 texel_offset = sign(texel_top_left);
float4 fcoords = max(coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y),
float4(0.0, 0.0, 0.0, 0.0));
// Load four texels.
float4 s00 = SampleFromVRAM(texpage, clamp(fcoords.xy, uv_limits.xy, uv_limits.zw));
float4 s10 = SampleFromVRAM(texpage, clamp(fcoords.zy, uv_limits.xy, uv_limits.zw));
float4 s01 = SampleFromVRAM(texpage, clamp(fcoords.xw, uv_limits.xy, uv_limits.zw));
float4 s11 = SampleFromVRAM(texpage, clamp(fcoords.zw, uv_limits.xy, uv_limits.zw));
// Compute alpha from how many texels aren't pixel color 0000h.
float a00 = float(VECTOR_NEQ(s00, TRANSPARENT_PIXEL_COLOR));
float a10 = float(VECTOR_NEQ(s10, TRANSPARENT_PIXEL_COLOR));
float a01 = float(VECTOR_NEQ(s01, TRANSPARENT_PIXEL_COLOR));
float a11 = float(VECTOR_NEQ(s11, TRANSPARENT_PIXEL_COLOR));
// Bilinearly interpolate.
float2 weights = abs(texel_top_left);
texcol = lerp(lerp(s00, s10, weights.x), lerp(s01, s11, weights.x), weights.y);
ialpha = lerp(lerp(a00, a10, weights.x), lerp(a01, a11, weights.x), weights.y);
}
#endif
)";
if (textured)
{
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true);
DeclareFragmentEntryPoint(ss, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, true,
use_dual_source ? 2 : 1, true);
}
else
{
@ -725,48 +753,35 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
#endif
#if TEXTURED
#if TEXTURE_FILTERING
// Compute the coordinates of the four texels we will be interpolating between.
// TODO: Find some way to clamp this to the triangle texture coordinates?
float2 downscaled_coords = v_tex0;
float2 coords = v_tex0;
float4 uv_limits = v_uv_limits;
float4 texcol;
// We can't currently use upscaled coordinate for palettes because of how they're packed.
// Not that it would be any benefit anyway, render-to-texture effects don't use palettes.
#if PALETTE
downscaled_coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
#else
uv_limits *= float4(RESOLUTION_SCALE, RESOLUTION_SCALE, RESOLUTION_SCALE, RESOLUTION_SCALE);
#endif
float2 texel_top_left = frac(downscaled_coords) - float2(0.5, 0.5);
float2 texel_offset = sign(texel_top_left);
float4 fcoords = max(downscaled_coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y),
float4(0.0, 0.0, 0.0, 0.0));
// Load four texels.
float4 s00 = SampleFromVRAM(v_texpage, fcoords.xy);
float4 s10 = SampleFromVRAM(v_texpage, fcoords.zy);
float4 s01 = SampleFromVRAM(v_texpage, fcoords.xw);
float4 s11 = SampleFromVRAM(v_texpage, fcoords.zw);
// Compute alpha from how many texels aren't pixel color 0000h.
float a00 = float(VECTOR_NEQ(s00, TRANSPARENT_PIXEL_COLOR));
float a10 = float(VECTOR_NEQ(s10, TRANSPARENT_PIXEL_COLOR));
float a01 = float(VECTOR_NEQ(s01, TRANSPARENT_PIXEL_COLOR));
float a11 = float(VECTOR_NEQ(s11, TRANSPARENT_PIXEL_COLOR));
// Bilinearly interpolate.
float2 weights = abs(texel_top_left);
float4 texcol = lerp(lerp(s00, s10, weights.x), lerp(s01, s11, weights.x), weights.y);
ialpha = lerp(lerp(a00, a10, weights.x), lerp(a01, a11, weights.x), weights.y);
#if TEXTURE_FILTERING
BilinearSampleFromVRAM(v_texpage, coords, uv_limits, texcol, ialpha);
if (ialpha < 0.5)
discard;
texcol.rgb /= float3(ialpha, ialpha, ialpha);
semitransparent = (texcol.a != 0.0);
#else
float4 texcol = SampleFromVRAM(v_texpage, v_tex0);
texcol = SampleFromVRAM(v_texpage, clamp(coords, uv_limits.xy, uv_limits.zw));
if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR))
discard;
semitransparent = (texcol.a != 0.0);
ialpha = 1.0;
#endif
semitransparent = (texcol.a != 0.0);
// If not using true color, truncate the framebuffer colors to 5-bit.
#if !TRUE_COLOR
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3;

View File

@ -646,6 +646,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
{
gpbuilder.AddVertexAttribute(2, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, u));
gpbuilder.AddVertexAttribute(3, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, texpage));
gpbuilder.AddVertexAttribute(4, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, uv_limits));
}
gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);