GPU: Implement weave deinterlacing

This commit is contained in:
Connor McLaughlin 2019-10-23 15:36:37 +10:00
parent 87f9f99938
commit 2d0dd03705
5 changed files with 101 additions and 74 deletions

View File

@ -342,12 +342,6 @@ void GPU::UpdateCRTCConfig()
cs.display_width = std::max<u32>(cs.visible_ticks_per_scanline / cs.dot_clock_divider, 1); cs.display_width = std::max<u32>(cs.visible_ticks_per_scanline / cs.dot_clock_divider, 1);
cs.display_height = cs.visible_scanlines_per_frame; cs.display_height = cs.visible_scanlines_per_frame;
if (m_GPUSTAT.vertical_interlace)
{
// Force progressive for now.
cs.display_height *= 2;
}
if (cs.display_width != old_horizontal_resolution || cs.display_height != old_vertical_resolution) if (cs.display_width != old_horizontal_resolution || cs.display_height != old_vertical_resolution)
Log_InfoPrintf("Visible resolution is now %ux%u", cs.display_width, cs.display_height); Log_InfoPrintf("Visible resolution is now %ux%u", cs.display_width, cs.display_height);

View File

@ -406,61 +406,80 @@ void main()
return ss.str(); return ss.str();
} }
std::string GPU_HW::GenerateRGB24DecodeFragmentShader() std::string GPU_HW::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
{ {
std::stringstream ss; std::stringstream ss;
GenerateShaderHeader(ss); GenerateShaderHeader(ss);
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
DefineMacro(ss, "INTERLACED", interlaced);
ss << R"( ss << R"(
in vec2 v_tex0; in vec2 v_tex0;
out vec4 o_col0; out vec4 o_col0;
uniform sampler2D samp0; uniform sampler2D samp0;
uniform ivec2 u_base_coords; uniform ivec3 u_base_coords;
ivec2 GetCoords(vec2 fragcoord)
{
ivec2 icoords = ivec2(fragcoord);
#if INTERLACED
if (((icoords.y - u_base_coords.z) & 1) != 0)
discard;
#endif
return icoords;
}
void main() void main()
{ {
// compute offset in dwords from the start of the 24-bit values ivec2 icoords = GetCoords(gl_FragCoord.xy);
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + int(gl_FragCoord.y));
int xoff = int(gl_FragCoord.x);
int dword_index = (xoff / 2) + (xoff / 4);
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these #if DEPTH_24BIT
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0)); // compute offset in dwords from the start of the 24-bit values
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0)); ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0)); int xoff = int(icoords.x);
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0)); int dword_index = (xoff / 2) + (xoff / 4);
// select the bit for this pixel depending on its offset in the 4-pixel block // sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
uint r, g, b; uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
int block_offset = xoff & 3; uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
if (block_offset == 0) uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
{ uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
r = s0 & 0xFFu;
g = s0 >> 8;
b = s1 & 0xFFu;
}
else if (block_offset == 1)
{
r = s1 >> 8;
g = s2 & 0xFFu;
b = s2 >> 8;
}
else if (block_offset == 2)
{
r = s1 & 0xFFu;
g = s1 >> 8;
b = s2 & 0xFFu;
}
else
{
r = s2 >> 8;
g = s3 & 0xFFu;
b = s3 >> 8;
}
// and normalize // select the bit for this pixel depending on its offset in the 4-pixel block
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255, 1.0); uint r, g, b;
int block_offset = xoff & 3;
if (block_offset == 0)
{
r = s0 & 0xFFu;
g = s0 >> 8;
b = s1 & 0xFFu;
}
else if (block_offset == 1)
{
r = s1 >> 8;
g = s2 & 0xFFu;
b = s2 >> 8;
}
else if (block_offset == 2)
{
r = s1 & 0xFFu;
g = s1 >> 8;
b = s2 & 0xFFu;
}
else
{
r = s2 >> 8;
g = s3 & 0xFFu;
b = s3 >> 8;
}
// and normalize
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255, 1.0);
#else
// load and return
o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
#endif
} }
)"; )";

View File

@ -97,7 +97,7 @@ protected:
TextureColorMode texture_color_mode, bool blending); TextureColorMode texture_color_mode, bool blending);
std::string GenerateScreenQuadVertexShader(); std::string GenerateScreenQuadVertexShader();
std::string GenerateFillFragmentShader(); std::string GenerateFillFragmentShader();
std::string GenerateRGB24DecodeFragmentShader(); std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
u32 m_resolution_scale = 1; u32 m_resolution_scale = 1;
HWRenderBatch m_batch = {}; HWRenderBatch m_batch = {};

View File

@ -274,19 +274,27 @@ bool GPU_HW_OpenGL::CompilePrograms()
} }
// TODO: Use string_view // TODO: Use string_view
if (!m_reinterpret_rgb8_program.Compile(GenerateScreenQuadVertexShader().c_str(), for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
GenerateRGB24DecodeFragmentShader().c_str()))
{ {
return false; for (u8 interlaced = 0; interlaced < 2; interlaced++)
} {
m_reinterpret_rgb8_program.BindFragData(0, "o_col0"); GL::Program& prog = m_display_programs[depth_24bit][interlaced];
if (!m_reinterpret_rgb8_program.Link()) const std::string vs = GenerateScreenQuadVertexShader();
return false; const std::string fs =
GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), ConvertToBoolUnchecked(interlaced));
if (!prog.Compile(vs.c_str(), fs.c_str()))
return false;
m_reinterpret_rgb8_program.Bind(); prog.BindFragData(0, "o_col0");
m_reinterpret_rgb8_program.RegisterUniform("u_base_coords"); if (!prog.Link())
m_reinterpret_rgb8_program.RegisterUniform("samp0"); return false;
m_reinterpret_rgb8_program.Uniform1i(1, 0);
prog.Bind();
prog.RegisterUniform("u_base_coords");
prog.RegisterUniform("samp0");
prog.Uniform1i(1, 0);
}
}
return true; return true;
} }
@ -400,24 +408,36 @@ void GPU_HW_OpenGL::UpdateDisplay()
} }
else else
{ {
const u32 field_offset = BoolToUInt8(m_GPUSTAT.vertical_interlace && !m_GPUSTAT.drawing_even_line);
const u32 vram_offset_x = m_crtc_state.regs.X; const u32 vram_offset_x = m_crtc_state.regs.X;
const u32 vram_offset_y = m_crtc_state.regs.Y; const u32 vram_offset_y = m_crtc_state.regs.Y;
const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale;
const u32 display_width = std::min<u32>(m_crtc_state.display_width, VRAM_WIDTH - vram_offset_x); const u32 display_width = std::min<u32>(m_crtc_state.display_width, VRAM_WIDTH - vram_offset_x);
const u32 display_height = std::min<u32>(m_crtc_state.display_height, VRAM_HEIGHT - vram_offset_y); const u32 display_height = std::min<u32>(m_crtc_state.display_height << BoolToUInt8(m_GPUSTAT.vertical_interlace),
VRAM_HEIGHT - vram_offset_y);
const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale;
const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height; const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
const u32 scaled_flipped_vram_offset_y = m_vram_texture->GetHeight() - scaled_vram_offset_y - scaled_display_height; const u32 scaled_flipped_vram_offset_y = m_vram_texture->GetHeight() - scaled_vram_offset_y - scaled_display_height;
if (m_GPUSTAT.display_area_color_depth_24) // fast path when both interlacing and 24-bit depth is off
if (!m_GPUSTAT.display_area_color_depth_24 && !m_GPUSTAT.vertical_interlace)
{
glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_vram_offset_x,
scaled_flipped_vram_offset_y, 0, m_display_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0,
scaled_display_width, scaled_display_height, 1);
m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width,
scaled_display_height, m_crtc_state.display_aspect_ratio);
}
else
{ {
glDisable(GL_BLEND); glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
// Because of how the reinterpret shader works, we need to use the downscaled version. // Because of how the reinterpret shader works, we need to use the downscaled version.
if (m_resolution_scale > 1) if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1)
{ {
m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
@ -432,10 +452,13 @@ void GPU_HW_OpenGL::UpdateDisplay()
m_vram_texture->Bind(); m_vram_texture->Bind();
} }
const GL::Program& prog = m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)]
[BoolToUInt8(m_GPUSTAT.vertical_interlace)];
m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glViewport(0, 0, display_width, display_height); glViewport(0, field_offset, display_width, display_height);
m_reinterpret_rgb8_program.Bind(); prog.Bind();
m_reinterpret_rgb8_program.Uniform2i(0, vram_offset_x, flipped_vram_offset_y); prog.Uniform3i(0, vram_offset_x, flipped_vram_offset_y, field_offset);
glDrawArrays(GL_TRIANGLES, 0, 3); glDrawArrays(GL_TRIANGLES, 0, 3);
// restore state // restore state
@ -446,15 +469,6 @@ void GPU_HW_OpenGL::UpdateDisplay()
m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, display_width, display_height, m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, display_width, display_height,
m_crtc_state.display_aspect_ratio); m_crtc_state.display_aspect_ratio);
} }
else
{
glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_vram_offset_x,
scaled_flipped_vram_offset_y, 0, m_display_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0,
scaled_display_width, scaled_display_height, 1);
m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width,
scaled_display_height, m_crtc_state.display_aspect_ratio);
}
} }
} }

View File

@ -73,7 +73,7 @@ private:
bool m_show_renderer_statistics = false; bool m_show_renderer_statistics = false;
std::array<std::array<std::array<std::array<GL::Program, 2>, 3>, 2>, 4> m_render_programs; std::array<std::array<std::array<std::array<GL::Program, 2>, 3>, 2>, 4> m_render_programs;
GL::Program m_reinterpret_rgb8_program; std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
GLStats m_stats = {}; GLStats m_stats = {};
GLStats m_last_stats = {}; GLStats m_last_stats = {};