GPU/HW: Clamp interpolated UVs to polygon limits
Fixes texture filtering and PGXP issues in some games.
This commit is contained in:
parent
f14dc6de27
commit
b95ce993e0
|
@ -215,6 +215,32 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GPU_HW::AreUVLimitsNeeded()
|
||||||
|
{
|
||||||
|
// We only need UV limits if PGXP is enabled, or texture filtering is enabled.
|
||||||
|
return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filtering;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices)
|
||||||
|
{
|
||||||
|
u16 min_u = vertices[0].u, max_u = vertices[0].u, min_v = vertices[0].v, max_v = vertices[0].v;
|
||||||
|
for (u32 i = 1; i < num_vertices; i++)
|
||||||
|
{
|
||||||
|
min_u = std::min<u16>(min_u, vertices[i].u);
|
||||||
|
max_u = std::max<u16>(max_u, vertices[i].u);
|
||||||
|
min_v = std::min<u16>(min_v, vertices[i].v);
|
||||||
|
max_v = std::max<u16>(max_v, vertices[i].v);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (min_u != max_u)
|
||||||
|
max_u--;
|
||||||
|
if (min_v != max_v)
|
||||||
|
max_v--;
|
||||||
|
|
||||||
|
for (u32 i = 0; i < num_vertices; i++)
|
||||||
|
vertices[i].SetUVLimits(min_u, max_u, min_v, max_v);
|
||||||
|
}
|
||||||
|
|
||||||
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
|
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
|
||||||
{
|
{
|
||||||
const float dx = x1 - x0;
|
const float dx = x1 - x0;
|
||||||
|
@ -223,10 +249,10 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
|
||||||
if (dx == 0.0f && dy == 0.0f)
|
if (dx == 0.0f && dy == 0.0f)
|
||||||
{
|
{
|
||||||
// Degenerate, render a point.
|
// Degenerate, render a point.
|
||||||
output[0].Set(x0, y0, depth, 1.0f, col0, 0, 0);
|
output[0].Set(x0, y0, depth, 1.0f, col0, 0, 0, 0);
|
||||||
output[1].Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0);
|
output[1].Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0);
|
||||||
output[2].Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0);
|
output[2].Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
|
||||||
output[3].Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0);
|
output[3].Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -290,10 +316,10 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
|
||||||
const float ox1 = x1 + pad_x1;
|
const float ox1 = x1 + pad_x1;
|
||||||
const float oy1 = y1 + pad_y1;
|
const float oy1 = y1 + pad_y1;
|
||||||
|
|
||||||
output[0].Set(ox0, oy0, depth, 1.0f, col0, 0, 0);
|
output[0].Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0);
|
||||||
output[1].Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0);
|
output[1].Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0);
|
||||||
output[2].Set(ox1, oy1, depth, 1.0f, col1, 0, 0);
|
output[2].Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0);
|
||||||
output[3].Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0);
|
output[3].Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
AddVertex(output[0]);
|
AddVertex(output[0]);
|
||||||
|
@ -339,7 +365,7 @@ void GPU_HW::LoadVertices()
|
||||||
native_vertex_positions[i][0] = native_x;
|
native_vertex_positions[i][0] = native_x;
|
||||||
native_vertex_positions[i][1] = native_y;
|
native_vertex_positions[i][1] = native_y;
|
||||||
vertices[i].Set(static_cast<float>(native_x), static_cast<float>(native_y), depth, 1.0f, color, texpage,
|
vertices[i].Set(static_cast<float>(native_x), static_cast<float>(native_y), depth, 1.0f, color, texpage,
|
||||||
texcoord);
|
texcoord, 0xFFFF0000u);
|
||||||
|
|
||||||
if (pgxp)
|
if (pgxp)
|
||||||
{
|
{
|
||||||
|
@ -357,6 +383,9 @@ void GPU_HW::LoadVertices()
|
||||||
if (rc.quad_polygon && m_resolution_scale > 1)
|
if (rc.quad_polygon && m_resolution_scale > 1)
|
||||||
HandleFlippedQuadTextureCoordinates(vertices.data());
|
HandleFlippedQuadTextureCoordinates(vertices.data());
|
||||||
|
|
||||||
|
if (AreUVLimitsNeeded())
|
||||||
|
ComputePolygonUVLimits(vertices.data(), num_vertices);
|
||||||
|
|
||||||
if (!IsDrawingAreaIsValid())
|
if (!IsDrawingAreaIsValid())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -490,14 +519,15 @@ void GPU_HW::LoadVertices()
|
||||||
const float quad_start_x = static_cast<float>(pos_x + x_offset);
|
const float quad_start_x = static_cast<float>(pos_x + x_offset);
|
||||||
const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
|
const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
|
||||||
const u16 tex_right = tex_left + static_cast<u16>(quad_width);
|
const u16 tex_right = tex_left + static_cast<u16>(quad_width);
|
||||||
|
const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1);
|
||||||
|
|
||||||
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top);
|
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
|
||||||
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top);
|
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
|
||||||
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom);
|
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
|
||||||
|
|
||||||
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom);
|
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
|
||||||
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top);
|
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
|
||||||
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom);
|
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
|
||||||
|
|
||||||
x_offset += quad_width;
|
x_offset += quad_width;
|
||||||
tex_left = 0;
|
tex_left = 0;
|
||||||
|
@ -628,6 +658,8 @@ void GPU_HW::LoadVertices()
|
||||||
UnreachableCode();
|
UnreachableCode();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FlushRender();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
|
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
|
||||||
|
|
|
@ -58,13 +58,16 @@ protected:
|
||||||
u32 texpage;
|
u32 texpage;
|
||||||
u16 u; // 16-bit texcoords are needed for 256 extent rectangles
|
u16 u; // 16-bit texcoords are needed for 256 extent rectangles
|
||||||
u16 v;
|
u16 v;
|
||||||
|
u32 uv_limits;
|
||||||
|
|
||||||
ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord)
|
ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord,
|
||||||
|
u32 uv_limits_)
|
||||||
{
|
{
|
||||||
Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8));
|
Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8), uv_limits_);
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_)
|
ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_,
|
||||||
|
u32 uv_limits_)
|
||||||
{
|
{
|
||||||
x = x_;
|
x = x_;
|
||||||
y = y_;
|
y = y_;
|
||||||
|
@ -74,6 +77,17 @@ protected:
|
||||||
texpage = texpage_;
|
texpage = texpage_;
|
||||||
u = u_;
|
u = u_;
|
||||||
v = v_;
|
v = v_;
|
||||||
|
uv_limits = uv_limits_;
|
||||||
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE static u32 PackUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v)
|
||||||
|
{
|
||||||
|
return min_u | (min_v << 8) | (max_u << 16) | (max_v << 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v)
|
||||||
|
{
|
||||||
|
uv_limits = PackUVLimits(min_u, max_u, min_v, max_v);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -236,6 +250,10 @@ protected:
|
||||||
/// Handles quads with flipped texture coordinate directions.
|
/// Handles quads with flipped texture coordinate directions.
|
||||||
static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices);
|
static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices);
|
||||||
|
|
||||||
|
/// Computes polygon U/V boundaries.
|
||||||
|
static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices);
|
||||||
|
static bool AreUVLimitsNeeded();
|
||||||
|
|
||||||
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
|
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
|
||||||
|
|
||||||
BatchVertex* m_batch_start_vertex_ptr = nullptr;
|
BatchVertex* m_batch_start_vertex_ptr = nullptr;
|
||||||
|
|
|
@ -265,11 +265,12 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
|
||||||
|
|
||||||
bool GPU_HW_D3D11::CreateBatchInputLayout()
|
bool GPU_HW_D3D11::CreateBatchInputLayout()
|
||||||
{
|
{
|
||||||
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 4> attributes = {
|
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 5> attributes = {
|
||||||
{{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0},
|
{{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||||
{"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0},
|
{"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||||
{"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0},
|
{"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||||
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
|
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||||
|
{"ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, uv_limits), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
|
||||||
|
|
||||||
// we need a vertex shader...
|
// we need a vertex shader...
|
||||||
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
|
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
|
||||||
|
|
|
@ -95,6 +95,7 @@ private:
|
||||||
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
|
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
|
||||||
|
|
||||||
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
|
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
|
||||||
|
ComPtr<ID3D11RasterizerState> m_wireframe_rasterizer_state;
|
||||||
|
|
||||||
ComPtr<ID3D11DepthStencilState> m_depth_disabled_state;
|
ComPtr<ID3D11DepthStencilState> m_depth_disabled_state;
|
||||||
ComPtr<ID3D11DepthStencilState> m_depth_test_always_state;
|
ComPtr<ID3D11DepthStencilState> m_depth_test_always_state;
|
||||||
|
|
|
@ -297,12 +297,15 @@ bool GPU_HW_OpenGL::CreateVertexBuffer()
|
||||||
glEnableVertexAttribArray(1);
|
glEnableVertexAttribArray(1);
|
||||||
glEnableVertexAttribArray(2);
|
glEnableVertexAttribArray(2);
|
||||||
glEnableVertexAttribArray(3);
|
glEnableVertexAttribArray(3);
|
||||||
|
glEnableVertexAttribArray(4);
|
||||||
glVertexAttribPointer(0, 4, GL_FLOAT, false, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x)));
|
glVertexAttribPointer(0, 4, GL_FLOAT, false, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x)));
|
||||||
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
|
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
|
||||||
reinterpret_cast<void*>(offsetof(BatchVertex, color)));
|
reinterpret_cast<void*>(offsetof(BatchVertex, color)));
|
||||||
glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, u)));
|
glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, u)));
|
||||||
glVertexAttribIPointer(3, 1, GL_UNSIGNED_INT, sizeof(BatchVertex),
|
glVertexAttribIPointer(3, 1, GL_UNSIGNED_INT, sizeof(BatchVertex),
|
||||||
reinterpret_cast<void*>(offsetof(BatchVertex, texpage)));
|
reinterpret_cast<void*>(offsetof(BatchVertex, texpage)));
|
||||||
|
glVertexAttribPointer(4, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
|
||||||
|
reinterpret_cast<void*>(offsetof(BatchVertex, uv_limits)));
|
||||||
glBindVertexArray(0);
|
glBindVertexArray(0);
|
||||||
|
|
||||||
glGenVertexArrays(1, &m_attributeless_vao_id);
|
glGenVertexArrays(1, &m_attributeless_vao_id);
|
||||||
|
@ -367,6 +370,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
{
|
{
|
||||||
prog.BindAttribute(2, "a_texcoord");
|
prog.BindAttribute(2, "a_texcoord");
|
||||||
prog.BindAttribute(3, "a_texpage");
|
prog.BindAttribute(3, "a_texpage");
|
||||||
|
prog.BindAttribute(4, "a_uv_limits");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!IsGLES() || m_supports_dual_source_blend)
|
if (!IsGLES() || m_supports_dual_source_blend)
|
||||||
|
|
|
@ -508,8 +508,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc
|
||||||
const char* output_block_suffix = upscaled_lines ? "VS" : "";
|
const char* output_block_suffix = upscaled_lines ? "VS" : "";
|
||||||
if (textured)
|
if (textured)
|
||||||
{
|
{
|
||||||
DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1,
|
DeclareVertexEntryPoint(
|
||||||
{{"nointerpolation", "uint4 v_texpage"}}, false, output_block_suffix);
|
ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1,
|
||||||
|
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, false, output_block_suffix);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -557,6 +558,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc
|
||||||
v_texpage.y = ((a_texpage >> 4) & 1u) * 256u * RESOLUTION_SCALE;
|
v_texpage.y = ((a_texpage >> 4) & 1u) * 256u * RESOLUTION_SCALE;
|
||||||
v_texpage.z = ((a_texpage >> 16) & 63u) * 16u * RESOLUTION_SCALE;
|
v_texpage.z = ((a_texpage >> 16) & 63u) * 16u * RESOLUTION_SCALE;
|
||||||
v_texpage.w = ((a_texpage >> 22) & 511u) * RESOLUTION_SCALE;
|
v_texpage.w = ((a_texpage >> 22) & 511u) * RESOLUTION_SCALE;
|
||||||
|
|
||||||
|
v_uv_limits = a_uv_limits * float4(255.0, 255.0, 255.0, 255.0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
)";
|
)";
|
||||||
|
@ -658,13 +661,7 @@ uint2 FloatToIntegerCoords(float2 coords)
|
||||||
float4 SampleFromVRAM(uint4 texpage, float2 coords)
|
float4 SampleFromVRAM(uint4 texpage, float2 coords)
|
||||||
{
|
{
|
||||||
#if PALETTE
|
#if PALETTE
|
||||||
// We can't currently use upscaled coordinate for palettes because of how they're packed.
|
|
||||||
// Not that it would be any benefit anyway, render-to-texture effects don't use palettes.
|
|
||||||
#if !TEXTURE_FILTERING
|
|
||||||
coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
|
||||||
#endif
|
|
||||||
uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords));
|
uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords));
|
||||||
|
|
||||||
uint2 index_coord = icoord;
|
uint2 index_coord = icoord;
|
||||||
#if PALETTE_4_BIT
|
#if PALETTE_4_BIT
|
||||||
index_coord.x /= 4u;
|
index_coord.x /= 4u;
|
||||||
|
@ -698,12 +695,43 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
|
||||||
return SAMPLE_TEXTURE(samp0, float2(direct_icoord) * RCP_VRAM_SIZE);
|
return SAMPLE_TEXTURE(samp0, float2(direct_icoord) * RCP_VRAM_SIZE);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BilinearSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
|
||||||
|
out float4 texcol, out float ialpha)
|
||||||
|
{
|
||||||
|
// Compute the coordinates of the four texels we will be interpolating between.
|
||||||
|
// Clamp this to the triangle texture coordinates.
|
||||||
|
float2 texel_top_left = frac(coords) - float2(0.5, 0.5);
|
||||||
|
float2 texel_offset = sign(texel_top_left);
|
||||||
|
float4 fcoords = max(coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y),
|
||||||
|
float4(0.0, 0.0, 0.0, 0.0));
|
||||||
|
|
||||||
|
// Load four texels.
|
||||||
|
float4 s00 = SampleFromVRAM(texpage, clamp(fcoords.xy, uv_limits.xy, uv_limits.zw));
|
||||||
|
float4 s10 = SampleFromVRAM(texpage, clamp(fcoords.zy, uv_limits.xy, uv_limits.zw));
|
||||||
|
float4 s01 = SampleFromVRAM(texpage, clamp(fcoords.xw, uv_limits.xy, uv_limits.zw));
|
||||||
|
float4 s11 = SampleFromVRAM(texpage, clamp(fcoords.zw, uv_limits.xy, uv_limits.zw));
|
||||||
|
|
||||||
|
// Compute alpha from how many texels aren't pixel color 0000h.
|
||||||
|
float a00 = float(VECTOR_NEQ(s00, TRANSPARENT_PIXEL_COLOR));
|
||||||
|
float a10 = float(VECTOR_NEQ(s10, TRANSPARENT_PIXEL_COLOR));
|
||||||
|
float a01 = float(VECTOR_NEQ(s01, TRANSPARENT_PIXEL_COLOR));
|
||||||
|
float a11 = float(VECTOR_NEQ(s11, TRANSPARENT_PIXEL_COLOR));
|
||||||
|
|
||||||
|
// Bilinearly interpolate.
|
||||||
|
float2 weights = abs(texel_top_left);
|
||||||
|
texcol = lerp(lerp(s00, s10, weights.x), lerp(s01, s11, weights.x), weights.y);
|
||||||
|
ialpha = lerp(lerp(a00, a10, weights.x), lerp(a01, a11, weights.x), weights.y);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
)";
|
)";
|
||||||
|
|
||||||
if (textured)
|
if (textured)
|
||||||
{
|
{
|
||||||
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true);
|
DeclareFragmentEntryPoint(ss, 1, 1,
|
||||||
|
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, true,
|
||||||
|
use_dual_source ? 2 : 1, true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -725,48 +753,35 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if TEXTURED
|
#if TEXTURED
|
||||||
|
float2 coords = v_tex0;
|
||||||
|
float4 uv_limits = v_uv_limits;
|
||||||
|
float4 texcol;
|
||||||
|
|
||||||
|
// We can't currently use upscaled coordinate for palettes because of how they're packed.
|
||||||
|
// Not that it would be any benefit anyway, render-to-texture effects don't use palettes.
|
||||||
|
#if PALETTE
|
||||||
|
coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||||
|
#else
|
||||||
|
uv_limits *= float4(RESOLUTION_SCALE, RESOLUTION_SCALE, RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if TEXTURE_FILTERING
|
#if TEXTURE_FILTERING
|
||||||
// Compute the coordinates of the four texels we will be interpolating between.
|
BilinearSampleFromVRAM(v_texpage, coords, uv_limits, texcol, ialpha);
|
||||||
// TODO: Find some way to clamp this to the triangle texture coordinates?
|
|
||||||
float2 downscaled_coords = v_tex0;
|
|
||||||
#if PALETTE
|
|
||||||
downscaled_coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
|
||||||
#endif
|
|
||||||
float2 texel_top_left = frac(downscaled_coords) - float2(0.5, 0.5);
|
|
||||||
float2 texel_offset = sign(texel_top_left);
|
|
||||||
float4 fcoords = max(downscaled_coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y),
|
|
||||||
float4(0.0, 0.0, 0.0, 0.0));
|
|
||||||
|
|
||||||
// Load four texels.
|
|
||||||
float4 s00 = SampleFromVRAM(v_texpage, fcoords.xy);
|
|
||||||
float4 s10 = SampleFromVRAM(v_texpage, fcoords.zy);
|
|
||||||
float4 s01 = SampleFromVRAM(v_texpage, fcoords.xw);
|
|
||||||
float4 s11 = SampleFromVRAM(v_texpage, fcoords.zw);
|
|
||||||
|
|
||||||
// Compute alpha from how many texels aren't pixel color 0000h.
|
|
||||||
float a00 = float(VECTOR_NEQ(s00, TRANSPARENT_PIXEL_COLOR));
|
|
||||||
float a10 = float(VECTOR_NEQ(s10, TRANSPARENT_PIXEL_COLOR));
|
|
||||||
float a01 = float(VECTOR_NEQ(s01, TRANSPARENT_PIXEL_COLOR));
|
|
||||||
float a11 = float(VECTOR_NEQ(s11, TRANSPARENT_PIXEL_COLOR));
|
|
||||||
|
|
||||||
// Bilinearly interpolate.
|
|
||||||
float2 weights = abs(texel_top_left);
|
|
||||||
float4 texcol = lerp(lerp(s00, s10, weights.x), lerp(s01, s11, weights.x), weights.y);
|
|
||||||
ialpha = lerp(lerp(a00, a10, weights.x), lerp(a01, a11, weights.x), weights.y);
|
|
||||||
if (ialpha < 0.5)
|
if (ialpha < 0.5)
|
||||||
discard;
|
discard;
|
||||||
|
|
||||||
texcol.rgb /= float3(ialpha, ialpha, ialpha);
|
texcol.rgb /= float3(ialpha, ialpha, ialpha);
|
||||||
semitransparent = (texcol.a != 0.0);
|
semitransparent = (texcol.a != 0.0);
|
||||||
#else
|
#else
|
||||||
float4 texcol = SampleFromVRAM(v_texpage, v_tex0);
|
texcol = SampleFromVRAM(v_texpage, clamp(coords, uv_limits.xy, uv_limits.zw));
|
||||||
if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR))
|
if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR))
|
||||||
discard;
|
discard;
|
||||||
|
|
||||||
semitransparent = (texcol.a != 0.0);
|
|
||||||
ialpha = 1.0;
|
ialpha = 1.0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
semitransparent = (texcol.a != 0.0);
|
||||||
|
|
||||||
// If not using true color, truncate the framebuffer colors to 5-bit.
|
// If not using true color, truncate the framebuffer colors to 5-bit.
|
||||||
#if !TRUE_COLOR
|
#if !TRUE_COLOR
|
||||||
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3;
|
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3;
|
||||||
|
|
|
@ -646,6 +646,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
||||||
{
|
{
|
||||||
gpbuilder.AddVertexAttribute(2, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, u));
|
gpbuilder.AddVertexAttribute(2, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, u));
|
||||||
gpbuilder.AddVertexAttribute(3, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, texpage));
|
gpbuilder.AddVertexAttribute(3, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, texpage));
|
||||||
|
gpbuilder.AddVertexAttribute(4, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, uv_limits));
|
||||||
}
|
}
|
||||||
|
|
||||||
gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
|
gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
|
||||||
|
|
Loading…
Reference in New Issue