GPU: Fix mismatched scanout between hardware/software renderers

This commit is contained in:
Connor McLaughlin 2020-05-12 01:35:02 +10:00
parent 37d9f6344d
commit 81f297456c
4 changed files with 26 additions and 25 deletions

View File

@ -598,7 +598,7 @@ void GPU_HW_D3D11::UpdateDisplay()
}
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_vram_offset_y <= m_vram_texture.GetHeight()))
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{
m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(),
m_vram_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y,
@ -612,13 +612,13 @@ void GPU_HW_D3D11::UpdateDisplay()
const u32 reinterpret_field_offset = GetInterlacedField();
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
const u32 reinterpret_width =
scaled_display_width + ((m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale);
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, reinterpret_field_offset};
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, reinterpret_crop_left,
reinterpret_field_offset};
ID3D11PixelShader* display_pixel_shader =
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Get();
SetViewportAndScissor(0, reinterpret_field_offset, reinterpret_width, scaled_display_height);
SetViewportAndScissor(0, reinterpret_field_offset, scaled_display_width, scaled_display_height);
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),

View File

@ -599,7 +599,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
}
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_vram_offset_y <= m_vram_texture.GetHeight()))
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x,
@ -621,13 +621,13 @@ void GPU_HW_OpenGL::UpdateDisplay()
m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
const u32 reinterpret_field_offset = GetInterlacedField();
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
const u32 reinterpret_width =
scaled_display_width + ((m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale);
const u32 uniforms[4] = {reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_field_offset};
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_crop_left,
reinterpret_field_offset};
UploadUniformBuffer(uniforms, sizeof(uniforms));
m_batch_ubo_dirty = true;
glViewport(0, reinterpret_field_offset, reinterpret_width, scaled_display_height);
glViewport(0, reinterpret_field_offset, scaled_display_width, scaled_display_height);
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);

View File

@ -995,13 +995,13 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
DefineMacro(ss, "INTERLACED", interlaced);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_field_offset"});
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"});
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1);
ss << R"(
{
uint2 icoords = uint2(v_pos.xy) + u_vram_offset;
uint2 icoords = uint2(v_pos.xy);
#if INTERLACED
if (((icoords.y / RESOLUTION_SCALE) & 1u) != u_field_offset)
@ -1010,12 +1010,12 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
#if DEPTH_24BIT
// relative to start of scanout
uint relative_x = (icoords.x - u_vram_offset.x) / RESOLUTION_SCALE;
icoords.x = u_vram_offset.x + ((relative_x * 3u) / 2u) * RESOLUTION_SCALE;
uint relative_x = (icoords.x + u_crop_left) / RESOLUTION_SCALE;
uint2 vram_coords = u_vram_offset + uint2(((relative_x * 3u) / 2u) * RESOLUTION_SCALE, icoords.y);
// load adjacent 16-bit texels
uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(icoords % VRAM_SIZE), 0));
uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2((icoords + uint2(RESOLUTION_SCALE, 0)) % VRAM_SIZE), 0));
uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(vram_coords % VRAM_SIZE), 0));
uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2((vram_coords + uint2(RESOLUTION_SCALE, 0)) % VRAM_SIZE), 0));
// select which part of the combined 16-bit texels we are currently shading
uint s1s0 = ((s1 << 16) | s0) >> ((relative_x & 1u) * 8u);
@ -1025,7 +1025,8 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
float((s1s0 >> 16u) & 0xFFu) / 255.0, 1.0);
#else
// load and return
o_col0 = LOAD_TEXTURE(samp0, int2(icoords % VRAM_SIZE), 0);
uint2 vram_coords = u_vram_offset + uint2(icoords.x + u_crop_left, icoords.y);
o_col0 = LOAD_TEXTURE(samp0, int2(vram_coords % VRAM_SIZE), 0);
#endif
}
)";

View File

@ -154,36 +154,36 @@ void GPU_SW::UpdateDisplay()
const u32 vram_offset_y = m_crtc_state.display_vram_top;
const u32 display_width = m_crtc_state.display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height;
const u32 texture_offset_x = m_crtc_state.display_vram_left - m_crtc_state.regs.X;
if (IsInterlacedDisplayEnabled())
{
const u32 field = GetInterlacedField();
if (m_GPUSTAT.display_area_color_depth_24)
{
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
VRAM_WIDTH, display_width, display_height, true);
VRAM_WIDTH, display_width + texture_offset_x, display_height, true);
}
else
{
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
VRAM_WIDTH, display_width, display_height, true);
VRAM_WIDTH, display_width + texture_offset_x, display_height, true);
}
}
else
{
if (m_GPUSTAT.display_area_color_depth_24)
{
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, display_width,
display_height, false);
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
display_width + texture_offset_x, display_height, false);
}
else
{
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, display_width,
display_height, false);
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
display_width + texture_offset_x, display_height, false);
}
}
const u32 texture_offset_x = m_crtc_state.display_vram_left - m_crtc_state.regs.X;
m_host_display->UpdateTexture(m_display_texture.get(), texture_offset_x, 0, display_width, display_height,
m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, display_width, display_height,
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0,
display_width, display_height);